Command that produces this log: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> trans_rep.weight: torch.Size([1024, 2048]) >>> trans_rep.bias: torch.Size([1024]) >>> hidden_ffns.Corruplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Corruplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Cybercrimeplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Cybercrimeplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Disasterplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Disasterplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Displacementplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Displacementplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Epidemiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Epidemiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Etiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Etiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Protestplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Protestplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Terrorplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Terrorplate.layers.0.bias: torch.Size([768]) >>> template_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> type_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Corruplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Corruplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Disasterplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Disasterplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Displacementplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Displacementplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Epidemiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Epidemiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Etiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Etiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Protestplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Protestplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Terrorplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Terrorplate.layers.1.bias: torch.Size([6]) >>> completion_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Corruplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Corruplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Disasterplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Disasterplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Displacementplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Displacementplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Epidemiplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Epidemiplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Etiplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Etiplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Protestplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Protestplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Terrorplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Terrorplate.layers.1.bias: torch.Size([4]) >>> overtime_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) n_trainable_params: 582185936, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:13:19.859437: step: 4/77, loss: 1.0444989204406738 2023-01-23 22:13:21.097636: step: 8/77, loss: 1.0512828826904297 2023-01-23 22:13:22.339934: step: 12/77, loss: 1.056790828704834 2023-01-23 22:13:23.662652: step: 16/77, loss: 1.0526840686798096 2023-01-23 22:13:24.937112: step: 20/77, loss: 1.0438315868377686 2023-01-23 22:13:26.197215: step: 24/77, loss: 1.0445082187652588 2023-01-23 22:13:27.461043: step: 28/77, loss: 1.0523055791854858 2023-01-23 22:13:28.765376: step: 32/77, loss: 1.043738603591919 2023-01-23 22:13:30.023326: step: 36/77, loss: 1.0327272415161133 2023-01-23 22:13:31.290739: step: 40/77, loss: 1.0247113704681396 2023-01-23 22:13:32.565712: step: 44/77, loss: 1.020169973373413 2023-01-23 22:13:33.864467: step: 48/77, loss: 1.012270212173462 2023-01-23 22:13:35.182575: step: 52/77, loss: 0.9989683628082275 2023-01-23 22:13:36.444992: step: 56/77, loss: 1.0067802667617798 2023-01-23 22:13:37.649880: step: 60/77, loss: 0.9830159544944763 2023-01-23 22:13:38.949048: step: 64/77, loss: 0.9617307782173157 2023-01-23 22:13:40.260955: step: 68/77, loss: 0.9592471718788147 2023-01-23 22:13:41.576698: step: 72/77, loss: 0.9552438855171204 2023-01-23 22:13:42.876566: step: 76/77, loss: 0.9411525726318359 2023-01-23 22:13:44.202620: step: 80/77, loss: 0.9311196804046631 2023-01-23 22:13:45.479635: step: 84/77, loss: 0.9200646877288818 2023-01-23 22:13:46.796472: step: 88/77, loss: 0.9144275188446045 2023-01-23 22:13:48.078820: step: 92/77, loss: 0.901231050491333 2023-01-23 22:13:49.362128: step: 96/77, loss: 0.8835911750793457 2023-01-23 22:13:50.644900: step: 100/77, loss: 0.8329275846481323 2023-01-23 22:13:51.930489: step: 104/77, loss: 0.8399503231048584 2023-01-23 22:13:53.151335: step: 108/77, loss: 0.8364051580429077 2023-01-23 22:13:54.404392: step: 112/77, loss: 0.8006849884986877 2023-01-23 22:13:55.688493: step: 116/77, loss: 0.7944232821464539 2023-01-23 22:13:56.987455: step: 120/77, loss: 0.7675794363021851 2023-01-23 22:13:58.260373: step: 124/77, loss: 0.7662374973297119 2023-01-23 22:13:59.602943: step: 128/77, loss: 0.7447980046272278 2023-01-23 22:14:00.914678: step: 132/77, loss: 0.6651491522789001 2023-01-23 22:14:02.243695: step: 136/77, loss: 0.6845536231994629 2023-01-23 22:14:03.567480: step: 140/77, loss: 0.6975764036178589 2023-01-23 22:14:04.881902: step: 144/77, loss: 0.632821798324585 2023-01-23 22:14:06.209721: step: 148/77, loss: 0.6046133041381836 2023-01-23 22:14:07.548763: step: 152/77, loss: 0.6029253005981445 2023-01-23 22:14:08.814287: step: 156/77, loss: 0.5973246097564697 2023-01-23 22:14:10.159621: step: 160/77, loss: 0.6111674308776855 2023-01-23 22:14:11.470710: step: 164/77, loss: 0.5706891417503357 2023-01-23 22:14:12.779930: step: 168/77, loss: 0.4798954129219055 2023-01-23 22:14:14.128062: step: 172/77, loss: 0.42601197957992554 2023-01-23 22:14:15.432963: step: 176/77, loss: 0.45769280195236206 2023-01-23 22:14:16.760600: step: 180/77, loss: 0.38394296169281006 2023-01-23 22:14:18.038506: step: 184/77, loss: 0.4356851577758789 2023-01-23 22:14:19.350956: step: 188/77, loss: 0.44299718737602234 2023-01-23 22:14:20.610085: step: 192/77, loss: 0.33653539419174194 2023-01-23 22:14:21.906858: step: 196/77, loss: 0.3349962830543518 2023-01-23 22:14:23.264619: step: 200/77, loss: 0.3130283057689667 2023-01-23 22:14:24.583517: step: 204/77, loss: 0.2645954191684723 2023-01-23 22:14:25.915898: step: 208/77, loss: 0.2651616930961609 2023-01-23 22:14:27.189828: step: 212/77, loss: 0.4297363758087158 2023-01-23 22:14:28.497774: step: 216/77, loss: 0.3081668019294739 2023-01-23 22:14:29.786842: step: 220/77, loss: 0.24094446003437042 2023-01-23 22:14:31.101922: step: 224/77, loss: 0.22433724999427795 2023-01-23 22:14:32.407129: step: 228/77, loss: 0.27115598320961 2023-01-23 22:14:33.771495: step: 232/77, loss: 0.1454138308763504 2023-01-23 22:14:34.986679: step: 236/77, loss: 0.19214697182178497 2023-01-23 22:14:36.289155: step: 240/77, loss: 0.30754104256629944 2023-01-23 22:14:37.568730: step: 244/77, loss: 0.16875210404396057 2023-01-23 22:14:38.869457: step: 248/77, loss: 0.18391621112823486 2023-01-23 22:14:40.166720: step: 252/77, loss: 0.18308106064796448 2023-01-23 22:14:41.472949: step: 256/77, loss: 0.287090539932251 2023-01-23 22:14:42.783446: step: 260/77, loss: 0.1613195836544037 2023-01-23 22:14:44.065389: step: 264/77, loss: 0.23789790272712708 2023-01-23 22:14:45.353249: step: 268/77, loss: 0.15599499642848969 2023-01-23 22:14:46.633025: step: 272/77, loss: 0.05770568549633026 2023-01-23 22:14:47.974830: step: 276/77, loss: 0.11479754745960236 2023-01-23 22:14:49.305381: step: 280/77, loss: 0.0677362009882927 2023-01-23 22:14:50.610128: step: 284/77, loss: 0.1197415217757225 2023-01-23 22:14:51.911098: step: 288/77, loss: 0.06359511613845825 2023-01-23 22:14:53.234894: step: 292/77, loss: 0.084564208984375 2023-01-23 22:14:54.542012: step: 296/77, loss: 0.390523225069046 2023-01-23 22:14:55.829080: step: 300/77, loss: 0.0934390053153038 2023-01-23 22:14:57.161241: step: 304/77, loss: 0.18686211109161377 2023-01-23 22:14:58.496244: step: 308/77, loss: 0.052013661712408066 2023-01-23 22:14:59.799951: step: 312/77, loss: 0.08927802741527557 2023-01-23 22:15:01.070305: step: 316/77, loss: 0.05229855701327324 2023-01-23 22:15:02.392235: step: 320/77, loss: 0.07221616804599762 2023-01-23 22:15:03.684990: step: 324/77, loss: 0.14579658210277557 2023-01-23 22:15:04.964207: step: 328/77, loss: 0.06311936676502228 2023-01-23 22:15:06.259378: step: 332/77, loss: 0.04793437197804451 2023-01-23 22:15:07.581103: step: 336/77, loss: 0.038324177265167236 2023-01-23 22:15:08.879756: step: 340/77, loss: 0.06017142906785011 2023-01-23 22:15:10.177990: step: 344/77, loss: 0.0927521139383316 2023-01-23 22:15:11.512725: step: 348/77, loss: 0.05052327364683151 2023-01-23 22:15:12.839861: step: 352/77, loss: 0.10013939440250397 2023-01-23 22:15:14.116458: step: 356/77, loss: 0.16297364234924316 2023-01-23 22:15:15.387183: step: 360/77, loss: 0.053793832659721375 2023-01-23 22:15:16.667537: step: 364/77, loss: 0.11682991683483124 2023-01-23 22:15:17.947897: step: 368/77, loss: 0.2339695543050766 2023-01-23 22:15:19.237566: step: 372/77, loss: 0.13662782311439514 2023-01-23 22:15:20.522937: step: 376/77, loss: 0.029554195702075958 2023-01-23 22:15:21.836126: step: 380/77, loss: 0.08953571319580078 2023-01-23 22:15:23.170888: step: 384/77, loss: 0.14427034556865692 2023-01-23 22:15:24.441436: step: 388/77, loss: 0.09134456515312195 ================================================== Loss: 0.481 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:17:31.776455: step: 4/77, loss: 0.12441090494394302 2023-01-23 22:17:33.040427: step: 8/77, loss: 0.09033513069152832 2023-01-23 22:17:34.292260: step: 12/77, loss: 0.07786523550748825 2023-01-23 22:17:35.577295: step: 16/77, loss: 0.13140356540679932 2023-01-23 22:17:36.802404: step: 20/77, loss: 0.07487793266773224 2023-01-23 22:17:38.111746: step: 24/77, loss: 0.11832275986671448 2023-01-23 22:17:39.461167: step: 28/77, loss: 0.09228794276714325 2023-01-23 22:17:40.805666: step: 32/77, loss: 0.0581531897187233 2023-01-23 22:17:42.060564: step: 36/77, loss: 0.08141757547855377 2023-01-23 22:17:43.370470: step: 40/77, loss: 0.06293760240077972 2023-01-23 22:17:44.702791: step: 44/77, loss: 0.027853157371282578 2023-01-23 22:17:45.984043: step: 48/77, loss: 0.1434049904346466 2023-01-23 22:17:47.287033: step: 52/77, loss: 0.11293593049049377 2023-01-23 22:17:48.554804: step: 56/77, loss: 0.04692786931991577 2023-01-23 22:17:49.858228: step: 60/77, loss: 0.053145136684179306 2023-01-23 22:17:51.132064: step: 64/77, loss: 0.05567377060651779 2023-01-23 22:17:52.417034: step: 68/77, loss: 0.03823421150445938 2023-01-23 22:17:53.714171: step: 72/77, loss: 0.1519671231508255 2023-01-23 22:17:54.973618: step: 76/77, loss: 0.13052865862846375 2023-01-23 22:17:56.287483: step: 80/77, loss: 0.15403302013874054 2023-01-23 22:17:57.592355: step: 84/77, loss: 0.15508079528808594 2023-01-23 22:17:58.925272: step: 88/77, loss: 0.06106165423989296 2023-01-23 22:18:00.227922: step: 92/77, loss: 0.09424936771392822 2023-01-23 22:18:01.533297: step: 96/77, loss: 0.18639744818210602 2023-01-23 22:18:02.836408: step: 100/77, loss: 0.15375545620918274 2023-01-23 22:18:04.160220: step: 104/77, loss: 0.08081331104040146 2023-01-23 22:18:05.469462: step: 108/77, loss: 0.12643709778785706 2023-01-23 22:18:06.752128: step: 112/77, loss: 0.06533145904541016 2023-01-23 22:18:08.071254: step: 116/77, loss: 0.0676630437374115 2023-01-23 22:18:09.416408: step: 120/77, loss: 0.15203221142292023 2023-01-23 22:18:10.691764: step: 124/77, loss: 0.12343025207519531 2023-01-23 22:18:12.046057: step: 128/77, loss: 0.06405288726091385 2023-01-23 22:18:13.334845: step: 132/77, loss: 0.026405390352010727 2023-01-23 22:18:14.641752: step: 136/77, loss: 0.12348470091819763 2023-01-23 22:18:15.935687: step: 140/77, loss: 0.05019168555736542 2023-01-23 22:18:17.261389: step: 144/77, loss: 0.06446860730648041 2023-01-23 22:18:18.520705: step: 148/77, loss: 0.06395231187343597 2023-01-23 22:18:19.843281: step: 152/77, loss: 0.09394072741270065 2023-01-23 22:18:21.181881: step: 156/77, loss: 0.08700048923492432 2023-01-23 22:18:22.488676: step: 160/77, loss: 0.16286557912826538 2023-01-23 22:18:23.784332: step: 164/77, loss: 0.16823582351207733 2023-01-23 22:18:25.107216: step: 168/77, loss: 0.14089858531951904 2023-01-23 22:18:26.398863: step: 172/77, loss: 0.1266268938779831 2023-01-23 22:18:27.706041: step: 176/77, loss: 0.09754317998886108 2023-01-23 22:18:29.007310: step: 180/77, loss: 0.04306968301534653 2023-01-23 22:18:30.300526: step: 184/77, loss: 0.17184460163116455 2023-01-23 22:18:31.626829: step: 188/77, loss: 0.17892251908779144 2023-01-23 22:18:32.893125: step: 192/77, loss: 0.11327420175075531 2023-01-23 22:18:34.197604: step: 196/77, loss: 0.08435102552175522 2023-01-23 22:18:35.490579: step: 200/77, loss: 0.11456001549959183 2023-01-23 22:18:36.785561: step: 204/77, loss: 0.09126681089401245 2023-01-23 22:18:38.043374: step: 208/77, loss: 0.12354177236557007 2023-01-23 22:18:39.311641: step: 212/77, loss: 0.08257673680782318 2023-01-23 22:18:40.589740: step: 216/77, loss: 0.1086714044213295 2023-01-23 22:18:41.882055: step: 220/77, loss: 0.07348623871803284 2023-01-23 22:18:43.159410: step: 224/77, loss: 0.16840782761573792 2023-01-23 22:18:44.495580: step: 228/77, loss: 0.06676463782787323 2023-01-23 22:18:45.742017: step: 232/77, loss: 0.07722212374210358 2023-01-23 22:18:46.999839: step: 236/77, loss: 0.12126356363296509 2023-01-23 22:18:48.262855: step: 240/77, loss: 0.1517094075679779 2023-01-23 22:18:49.580575: step: 244/77, loss: 0.08583660423755646 2023-01-23 22:18:50.896218: step: 248/77, loss: 0.06803622841835022 2023-01-23 22:18:52.192707: step: 252/77, loss: 0.1995316445827484 2023-01-23 22:18:53.456613: step: 256/77, loss: 0.08445043861865997 2023-01-23 22:18:54.782248: step: 260/77, loss: 0.09522661566734314 2023-01-23 22:18:56.061725: step: 264/77, loss: 0.11049655079841614 2023-01-23 22:18:57.317221: step: 268/77, loss: 0.11643015593290329 2023-01-23 22:18:58.623211: step: 272/77, loss: 0.07346449792385101 2023-01-23 22:18:59.945635: step: 276/77, loss: 0.22751890122890472 2023-01-23 22:19:01.217782: step: 280/77, loss: 0.10608585178852081 2023-01-23 22:19:02.536515: step: 284/77, loss: 0.1218462884426117 2023-01-23 22:19:03.819609: step: 288/77, loss: 0.30423903465270996 2023-01-23 22:19:05.155179: step: 292/77, loss: 0.12431395053863525 2023-01-23 22:19:06.438072: step: 296/77, loss: 0.0865350216627121 2023-01-23 22:19:07.698740: step: 300/77, loss: 0.28666821122169495 2023-01-23 22:19:08.983633: step: 304/77, loss: 0.1154976487159729 2023-01-23 22:19:10.275299: step: 308/77, loss: 0.05872436612844467 2023-01-23 22:19:11.534206: step: 312/77, loss: 0.0811275988817215 2023-01-23 22:19:12.823751: step: 316/77, loss: 0.07434535026550293 2023-01-23 22:19:14.126214: step: 320/77, loss: 0.0865451991558075 2023-01-23 22:19:15.424554: step: 324/77, loss: 0.08560467511415482 2023-01-23 22:19:16.758664: step: 328/77, loss: 0.26304253935813904 2023-01-23 22:19:18.045252: step: 332/77, loss: 0.11373617500066757 2023-01-23 22:19:19.320900: step: 336/77, loss: 0.15586160123348236 2023-01-23 22:19:20.633754: step: 340/77, loss: 0.16020292043685913 2023-01-23 22:19:21.925335: step: 344/77, loss: 0.14308831095695496 2023-01-23 22:19:23.214125: step: 348/77, loss: 0.2223663330078125 2023-01-23 22:19:24.515426: step: 352/77, loss: 0.19795863330364227 2023-01-23 22:19:25.796727: step: 356/77, loss: 0.17458972334861755 2023-01-23 22:19:27.088256: step: 360/77, loss: 0.16533160209655762 2023-01-23 22:19:28.400476: step: 364/77, loss: 0.09042729437351227 2023-01-23 22:19:29.724572: step: 368/77, loss: 0.14386004209518433 2023-01-23 22:19:31.024049: step: 372/77, loss: 0.10231050848960876 2023-01-23 22:19:32.329173: step: 376/77, loss: 0.12333185970783234 2023-01-23 22:19:33.594966: step: 380/77, loss: 0.031893059611320496 2023-01-23 22:19:34.900694: step: 384/77, loss: 0.18950426578521729 2023-01-23 22:19:36.202767: step: 388/77, loss: 0.10373860597610474 ================================================== Loss: 0.115 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 2 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:21:19.477421: step: 4/77, loss: 0.1142747551202774 2023-01-23 22:21:20.791632: step: 8/77, loss: 0.05736310034990311 2023-01-23 22:21:22.102181: step: 12/77, loss: 0.07951190322637558 2023-01-23 22:21:23.383897: step: 16/77, loss: 0.26192837953567505 2023-01-23 22:21:24.685954: step: 20/77, loss: 0.059663981199264526 2023-01-23 22:21:25.968182: step: 24/77, loss: 0.13435494899749756 2023-01-23 22:21:27.256300: step: 28/77, loss: 0.10370631515979767 2023-01-23 22:21:28.536930: step: 32/77, loss: 0.08149582147598267 2023-01-23 22:21:29.881223: step: 36/77, loss: 0.05023789405822754 2023-01-23 22:21:31.203275: step: 40/77, loss: 0.1312832534313202 2023-01-23 22:21:32.530617: step: 44/77, loss: 0.03221912682056427 2023-01-23 22:21:33.829013: step: 48/77, loss: 0.11100511252880096 2023-01-23 22:21:35.127900: step: 52/77, loss: 0.11007451266050339 2023-01-23 22:21:36.393540: step: 56/77, loss: 0.032753244042396545 2023-01-23 22:21:37.632375: step: 60/77, loss: 0.12694334983825684 2023-01-23 22:21:38.922831: step: 64/77, loss: 0.07343435287475586 2023-01-23 22:21:40.219535: step: 68/77, loss: 0.06336362659931183 2023-01-23 22:21:41.516032: step: 72/77, loss: 0.041768234223127365 2023-01-23 22:21:42.774960: step: 76/77, loss: 0.056367091834545135 2023-01-23 22:21:44.090044: step: 80/77, loss: 0.08505905419588089 2023-01-23 22:21:45.378392: step: 84/77, loss: 0.07694367319345474 2023-01-23 22:21:46.672816: step: 88/77, loss: 0.08983577787876129 2023-01-23 22:21:47.949207: step: 92/77, loss: 0.053283900022506714 2023-01-23 22:21:49.265062: step: 96/77, loss: 0.10469195246696472 2023-01-23 22:21:50.535778: step: 100/77, loss: 0.06275144964456558 2023-01-23 22:21:51.813994: step: 104/77, loss: 0.06701089441776276 2023-01-23 22:21:53.094151: step: 108/77, loss: 0.04644807428121567 2023-01-23 22:21:54.409538: step: 112/77, loss: 0.0461655892431736 2023-01-23 22:21:55.698692: step: 116/77, loss: 0.05462607741355896 2023-01-23 22:21:57.006396: step: 120/77, loss: 0.04734814912080765 2023-01-23 22:21:58.283267: step: 124/77, loss: 0.08012720942497253 2023-01-23 22:21:59.599075: step: 128/77, loss: 0.05201762914657593 2023-01-23 22:22:00.907318: step: 132/77, loss: 0.10686588287353516 2023-01-23 22:22:02.239896: step: 136/77, loss: 0.12117096781730652 2023-01-23 22:22:03.509043: step: 140/77, loss: 0.03733343631029129 2023-01-23 22:22:04.791173: step: 144/77, loss: 0.06947799026966095 2023-01-23 22:22:06.051955: step: 148/77, loss: 0.15459129214286804 2023-01-23 22:22:07.320958: step: 152/77, loss: 0.10610118508338928 2023-01-23 22:22:08.603491: step: 156/77, loss: 0.04381667077541351 2023-01-23 22:22:09.910247: step: 160/77, loss: 0.06606587767601013 2023-01-23 22:22:11.211043: step: 164/77, loss: 0.03466331586241722 2023-01-23 22:22:12.460338: step: 168/77, loss: 0.05533977597951889 2023-01-23 22:22:13.786152: step: 172/77, loss: 0.06665311753749847 2023-01-23 22:22:15.086248: step: 176/77, loss: 0.0230097696185112 2023-01-23 22:22:16.331491: step: 180/77, loss: 0.02612961269915104 2023-01-23 22:22:17.615587: step: 184/77, loss: 0.013719271868467331 2023-01-23 22:22:18.943468: step: 188/77, loss: 0.09981541335582733 2023-01-23 22:22:20.247682: step: 192/77, loss: 0.02909611165523529 2023-01-23 22:22:21.546542: step: 196/77, loss: 0.025418300181627274 2023-01-23 22:22:22.871749: step: 200/77, loss: 0.04251161217689514 2023-01-23 22:22:24.199621: step: 204/77, loss: 0.01297570951282978 2023-01-23 22:22:25.515017: step: 208/77, loss: 0.2978173494338989 2023-01-23 22:22:26.780567: step: 212/77, loss: 0.014417173340916634 2023-01-23 22:22:28.082480: step: 216/77, loss: 0.052583567798137665 2023-01-23 22:22:29.371495: step: 220/77, loss: 0.028254462406039238 2023-01-23 22:22:30.639194: step: 224/77, loss: 0.10017166286706924 2023-01-23 22:22:31.987739: step: 228/77, loss: 0.04614071175456047 2023-01-23 22:22:33.261223: step: 232/77, loss: 0.03228935971856117 2023-01-23 22:22:34.531297: step: 236/77, loss: 0.03745909035205841 2023-01-23 22:22:35.838577: step: 240/77, loss: 0.08055315166711807 2023-01-23 22:22:37.143843: step: 244/77, loss: 0.011832380667328835 2023-01-23 22:22:38.459712: step: 248/77, loss: 0.11246861517429352 2023-01-23 22:22:39.760546: step: 252/77, loss: 0.006759069859981537 2023-01-23 22:22:41.104779: step: 256/77, loss: 0.07749151438474655 2023-01-23 22:22:42.481517: step: 260/77, loss: 0.09028497338294983 2023-01-23 22:22:43.755034: step: 264/77, loss: 0.0039024720899760723 2023-01-23 22:22:45.037284: step: 268/77, loss: 0.08940325677394867 2023-01-23 22:22:46.319143: step: 272/77, loss: 0.0029816448222845793 2023-01-23 22:22:47.612883: step: 276/77, loss: 0.24304865300655365 2023-01-23 22:22:48.902055: step: 280/77, loss: 0.021988455206155777 2023-01-23 22:22:50.223202: step: 284/77, loss: 0.03558812662959099 2023-01-23 22:22:51.446050: step: 288/77, loss: 0.008235489949584007 2023-01-23 22:22:52.715887: step: 292/77, loss: 0.03035302273929119 2023-01-23 22:22:54.029815: step: 296/77, loss: 0.025868304073810577 2023-01-23 22:22:55.347838: step: 300/77, loss: 0.06794444471597672 2023-01-23 22:22:56.659025: step: 304/77, loss: 0.09457609057426453 2023-01-23 22:22:57.994384: step: 308/77, loss: 0.17274029552936554 2023-01-23 22:22:59.317532: step: 312/77, loss: 0.02986432984471321 2023-01-23 22:23:00.666420: step: 316/77, loss: 0.006221502088010311 2023-01-23 22:23:01.969772: step: 320/77, loss: 0.04203524440526962 2023-01-23 22:23:03.256608: step: 324/77, loss: 0.0495450533926487 2023-01-23 22:23:04.615222: step: 328/77, loss: 0.11815431714057922 2023-01-23 22:23:05.892086: step: 332/77, loss: 0.07679884135723114 2023-01-23 22:23:07.178165: step: 336/77, loss: 0.02332843467593193 2023-01-23 22:23:08.491354: step: 340/77, loss: 0.035125378519296646 2023-01-23 22:23:09.780668: step: 344/77, loss: 0.18532869219779968 2023-01-23 22:23:11.109914: step: 348/77, loss: 0.017119672149419785 2023-01-23 22:23:12.413286: step: 352/77, loss: 0.04413990676403046 2023-01-23 22:23:13.690037: step: 356/77, loss: 0.024440914392471313 2023-01-23 22:23:15.006222: step: 360/77, loss: 0.02238747477531433 2023-01-23 22:23:16.322378: step: 364/77, loss: 0.12076471745967865 2023-01-23 22:23:17.600014: step: 368/77, loss: 0.05480184406042099 2023-01-23 22:23:18.902480: step: 372/77, loss: 0.03360671550035477 2023-01-23 22:23:20.227985: step: 376/77, loss: 0.04755230247974396 2023-01-23 22:23:21.509379: step: 380/77, loss: 0.08242542296648026 2023-01-23 22:23:22.802884: step: 384/77, loss: 0.03073291853070259 2023-01-23 22:23:24.098788: step: 388/77, loss: 0.023550812155008316 ================================================== Loss: 0.068 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:25:26.812572: step: 4/77, loss: 0.10117167234420776 2023-01-23 22:25:28.149502: step: 8/77, loss: 0.035268642008304596 2023-01-23 22:25:29.435028: step: 12/77, loss: 0.020470503717660904 2023-01-23 22:25:30.675135: step: 16/77, loss: 0.0317903496325016 2023-01-23 22:25:31.994647: step: 20/77, loss: 0.03285245597362518 2023-01-23 22:25:33.265041: step: 24/77, loss: 0.013518502935767174 2023-01-23 22:25:34.554781: step: 28/77, loss: 0.04890953376889229 2023-01-23 22:25:35.813274: step: 32/77, loss: 0.007277567870914936 2023-01-23 22:25:37.076178: step: 36/77, loss: 0.035050973296165466 2023-01-23 22:25:38.350571: step: 40/77, loss: 0.005168437957763672 2023-01-23 22:25:39.592836: step: 44/77, loss: 0.09427545219659805 2023-01-23 22:25:40.893809: step: 48/77, loss: 0.007487798575311899 2023-01-23 22:25:42.177426: step: 52/77, loss: 0.13489431142807007 2023-01-23 22:25:43.486901: step: 56/77, loss: 0.018846577033400536 2023-01-23 22:25:44.745666: step: 60/77, loss: 0.06260690093040466 2023-01-23 22:25:46.038353: step: 64/77, loss: 0.03510448336601257 2023-01-23 22:25:47.317051: step: 68/77, loss: 0.09224671125411987 2023-01-23 22:25:48.577410: step: 72/77, loss: 0.006170531734824181 2023-01-23 22:25:49.916302: step: 76/77, loss: 0.04142601788043976 2023-01-23 22:25:51.246096: step: 80/77, loss: 0.00428326241672039 2023-01-23 22:25:52.560836: step: 84/77, loss: 0.04080601781606674 2023-01-23 22:25:53.829472: step: 88/77, loss: 0.014709645882248878 2023-01-23 22:25:55.140549: step: 92/77, loss: 0.02892473340034485 2023-01-23 22:25:56.443552: step: 96/77, loss: 0.027689656242728233 2023-01-23 22:25:57.729433: step: 100/77, loss: 0.029068466275930405 2023-01-23 22:25:59.059180: step: 104/77, loss: 0.08021325618028641 2023-01-23 22:26:00.372311: step: 108/77, loss: 0.0033673420548439026 2023-01-23 22:26:01.627282: step: 112/77, loss: 0.06603340059518814 2023-01-23 22:26:02.935921: step: 116/77, loss: 0.03336016461253166 2023-01-23 22:26:04.230116: step: 120/77, loss: 0.044526346027851105 2023-01-23 22:26:05.485129: step: 124/77, loss: 0.007119000889360905 2023-01-23 22:26:06.765998: step: 128/77, loss: 0.0037483470514416695 2023-01-23 22:26:08.021038: step: 132/77, loss: 0.01118344534188509 2023-01-23 22:26:09.262894: step: 136/77, loss: 0.004519638139754534 2023-01-23 22:26:10.578243: step: 140/77, loss: 0.011617088690400124 2023-01-23 22:26:11.858032: step: 144/77, loss: 0.022709239274263382 2023-01-23 22:26:13.155224: step: 148/77, loss: 0.009147140197455883 2023-01-23 22:26:14.429080: step: 152/77, loss: 0.07406341284513474 2023-01-23 22:26:15.682882: step: 156/77, loss: 0.017505250871181488 2023-01-23 22:26:16.997938: step: 160/77, loss: 0.045731909573078156 2023-01-23 22:26:18.267072: step: 164/77, loss: 0.026252295821905136 2023-01-23 22:26:19.616588: step: 168/77, loss: 0.10950756072998047 2023-01-23 22:26:20.915912: step: 172/77, loss: 0.037779469043016434 2023-01-23 22:26:22.222743: step: 176/77, loss: 0.12713013589382172 2023-01-23 22:26:23.537892: step: 180/77, loss: 0.07886983454227448 2023-01-23 22:26:24.846724: step: 184/77, loss: 0.04936029389500618 2023-01-23 22:26:26.106263: step: 188/77, loss: 0.033952511847019196 2023-01-23 22:26:27.395471: step: 192/77, loss: 0.09324926137924194 2023-01-23 22:26:28.700985: step: 196/77, loss: 0.07248904556035995 2023-01-23 22:26:30.002307: step: 200/77, loss: 0.3481594920158386 2023-01-23 22:26:31.268802: step: 204/77, loss: 0.0296194888651371 2023-01-23 22:26:32.545174: step: 208/77, loss: 0.009488312527537346 2023-01-23 22:26:33.856832: step: 212/77, loss: 0.01296425424516201 2023-01-23 22:26:35.224777: step: 216/77, loss: 0.010238438844680786 2023-01-23 22:26:36.482596: step: 220/77, loss: 0.09828265756368637 2023-01-23 22:26:37.802424: step: 224/77, loss: 0.047479234635829926 2023-01-23 22:26:39.084646: step: 228/77, loss: 0.015161667950451374 2023-01-23 22:26:40.385722: step: 232/77, loss: 0.07021617889404297 2023-01-23 22:26:41.725345: step: 236/77, loss: 0.04781772941350937 2023-01-23 22:26:43.030747: step: 240/77, loss: 0.010120526887476444 2023-01-23 22:26:44.323398: step: 244/77, loss: 0.11225083470344543 2023-01-23 22:26:45.653735: step: 248/77, loss: 0.04302593320608139 2023-01-23 22:26:47.001591: step: 252/77, loss: 0.007975922897458076 2023-01-23 22:26:48.284973: step: 256/77, loss: 0.03689086437225342 2023-01-23 22:26:49.567686: step: 260/77, loss: 0.022007912397384644 2023-01-23 22:26:50.873577: step: 264/77, loss: 0.06469503790140152 2023-01-23 22:26:52.144709: step: 268/77, loss: 0.010310238227248192 2023-01-23 22:26:53.419080: step: 272/77, loss: 0.060053229331970215 2023-01-23 22:26:54.719774: step: 276/77, loss: 0.015747323632240295 2023-01-23 22:26:55.992909: step: 280/77, loss: 0.011841855943202972 2023-01-23 22:26:57.235962: step: 284/77, loss: 0.02313528209924698 2023-01-23 22:26:58.505111: step: 288/77, loss: 0.03175218030810356 2023-01-23 22:26:59.757772: step: 292/77, loss: 0.05135905742645264 2023-01-23 22:27:01.066170: step: 296/77, loss: 0.015323103405535221 2023-01-23 22:27:02.387600: step: 300/77, loss: 0.023951048031449318 2023-01-23 22:27:03.706105: step: 304/77, loss: 0.04787027835845947 2023-01-23 22:27:05.043229: step: 308/77, loss: 0.006300767883658409 2023-01-23 22:27:06.352283: step: 312/77, loss: 0.06664685904979706 2023-01-23 22:27:07.621672: step: 316/77, loss: 0.02057635597884655 2023-01-23 22:27:08.944391: step: 320/77, loss: 0.033022478222846985 2023-01-23 22:27:10.236820: step: 324/77, loss: 0.010527991689741611 2023-01-23 22:27:11.558780: step: 328/77, loss: 0.032395221292972565 2023-01-23 22:27:12.850572: step: 332/77, loss: 0.05708228796720505 2023-01-23 22:27:14.154865: step: 336/77, loss: 0.05209455266594887 2023-01-23 22:27:15.487944: step: 340/77, loss: 0.17037370800971985 2023-01-23 22:27:16.804581: step: 344/77, loss: 0.022650867700576782 2023-01-23 22:27:18.158429: step: 348/77, loss: 0.0066597433760762215 2023-01-23 22:27:19.493793: step: 352/77, loss: 0.009481480345129967 2023-01-23 22:27:20.802184: step: 356/77, loss: 0.024885304272174835 2023-01-23 22:27:22.116319: step: 360/77, loss: 0.004146650433540344 2023-01-23 22:27:23.489176: step: 364/77, loss: 0.04929664731025696 2023-01-23 22:27:24.817734: step: 368/77, loss: 0.024423057213425636 2023-01-23 22:27:26.141704: step: 372/77, loss: 0.024516455829143524 2023-01-23 22:27:27.461545: step: 376/77, loss: 0.02542997896671295 2023-01-23 22:27:28.749283: step: 380/77, loss: 0.02705446630716324 2023-01-23 22:27:29.948757: step: 384/77, loss: 0.009648672305047512 2023-01-23 22:27:31.273949: step: 388/77, loss: 0.021449856460094452 ================================================== Loss: 0.041 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test Chinese: {'template': {'p': 0.9661016949152542, 'r': 0.44881889763779526, 'f1': 0.6129032258064516}, 'slot': {'p': 0.5925925925925926, 'r': 0.01384083044982699, 'f1': 0.02704987320371936}, 'combined': 0.016578954544215093, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test Korean: {'template': {'p': 0.9672131147540983, 'r': 0.4645669291338583, 'f1': 0.6276595744680851}, 'slot': {'p': 0.6153846153846154, 'r': 0.01384083044982699, 'f1': 0.027072758037225045}, 'combined': 0.0169924757893221, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test Russian: {'template': {'p': 0.9661016949152542, 'r': 0.44881889763779526, 'f1': 0.6129032258064516}, 'slot': {'p': 0.6296296296296297, 'r': 0.014705882352941176, 'f1': 0.028740490278951817}, 'combined': 0.017615139203228533, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 4 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:29:14.895846: step: 4/77, loss: 0.004307045601308346 2023-01-23 22:29:16.163654: step: 8/77, loss: 0.019689451903104782 2023-01-23 22:29:17.474474: step: 12/77, loss: 0.035611364990472794 2023-01-23 22:29:18.750653: step: 16/77, loss: 0.09661786258220673 2023-01-23 22:29:20.098488: step: 20/77, loss: 0.016645941883325577 2023-01-23 22:29:21.391609: step: 24/77, loss: 0.06498975306749344 2023-01-23 22:29:22.675226: step: 28/77, loss: 0.07612696290016174 2023-01-23 22:29:23.993705: step: 32/77, loss: 0.02551441453397274 2023-01-23 22:29:25.283901: step: 36/77, loss: 0.03179791569709778 2023-01-23 22:29:26.641830: step: 40/77, loss: 0.007174788508564234 2023-01-23 22:29:27.933247: step: 44/77, loss: 0.024703051894903183 2023-01-23 22:29:29.221563: step: 48/77, loss: 0.021204259246587753 2023-01-23 22:29:30.504499: step: 52/77, loss: 0.021670710295438766 2023-01-23 22:29:31.831668: step: 56/77, loss: 0.08658331632614136 2023-01-23 22:29:33.094288: step: 60/77, loss: 0.035944074392318726 2023-01-23 22:29:34.445626: step: 64/77, loss: 0.00285487063229084 2023-01-23 22:29:35.773231: step: 68/77, loss: 0.044412776827812195 2023-01-23 22:29:37.097334: step: 72/77, loss: 0.01924610510468483 2023-01-23 22:29:38.400056: step: 76/77, loss: 0.00307654682546854 2023-01-23 22:29:39.699761: step: 80/77, loss: 0.010064997710287571 2023-01-23 22:29:41.039811: step: 84/77, loss: 0.008352034725248814 2023-01-23 22:29:42.366631: step: 88/77, loss: 0.08968818187713623 2023-01-23 22:29:43.694384: step: 92/77, loss: 0.009886187501251698 2023-01-23 22:29:44.938655: step: 96/77, loss: 0.001971887657418847 2023-01-23 22:29:46.222169: step: 100/77, loss: 0.04601195082068443 2023-01-23 22:29:47.534520: step: 104/77, loss: 0.09782662242650986 2023-01-23 22:29:48.838027: step: 108/77, loss: 0.025908637791872025 2023-01-23 22:29:50.136197: step: 112/77, loss: 0.016505222767591476 2023-01-23 22:29:51.391430: step: 116/77, loss: 0.024203073233366013 2023-01-23 22:29:52.629683: step: 120/77, loss: 0.060282886028289795 2023-01-23 22:29:53.890777: step: 124/77, loss: 0.021418243646621704 2023-01-23 22:29:55.190171: step: 128/77, loss: 0.21600335836410522 2023-01-23 22:29:56.501917: step: 132/77, loss: 0.025364048779010773 2023-01-23 22:29:57.763873: step: 136/77, loss: 0.018149809911847115 2023-01-23 22:29:59.063670: step: 140/77, loss: 0.011929758824408054 2023-01-23 22:30:00.367777: step: 144/77, loss: 0.046187981963157654 2023-01-23 22:30:01.651278: step: 148/77, loss: 0.020687207579612732 2023-01-23 22:30:02.938750: step: 152/77, loss: 0.0076691824942827225 2023-01-23 22:30:04.152279: step: 156/77, loss: 0.01612572930753231 2023-01-23 22:30:05.467237: step: 160/77, loss: 0.005361623130738735 2023-01-23 22:30:06.744114: step: 164/77, loss: 0.013921466656029224 2023-01-23 22:30:08.042478: step: 168/77, loss: 0.014753730036318302 2023-01-23 22:30:09.326560: step: 172/77, loss: 0.006856784224510193 2023-01-23 22:30:10.632625: step: 176/77, loss: 0.006791251711547375 2023-01-23 22:30:11.912882: step: 180/77, loss: 0.023701781406998634 2023-01-23 22:30:13.214444: step: 184/77, loss: 0.003863999620079994 2023-01-23 22:30:14.506432: step: 188/77, loss: 0.08335664868354797 2023-01-23 22:30:15.849348: step: 192/77, loss: 0.0314469076693058 2023-01-23 22:30:17.135377: step: 196/77, loss: 0.028623215854167938 2023-01-23 22:30:18.459380: step: 200/77, loss: 0.016699109226465225 2023-01-23 22:30:19.825345: step: 204/77, loss: 0.20456035435199738 2023-01-23 22:30:21.160438: step: 208/77, loss: 0.02883007377386093 2023-01-23 22:30:22.473397: step: 212/77, loss: 0.019007431343197823 2023-01-23 22:30:23.770903: step: 216/77, loss: 0.012269491329789162 2023-01-23 22:30:25.096205: step: 220/77, loss: 0.018181482329964638 2023-01-23 22:30:26.369858: step: 224/77, loss: 0.006761718541383743 2023-01-23 22:30:27.668003: step: 228/77, loss: 0.09358687698841095 2023-01-23 22:30:28.963660: step: 232/77, loss: 0.015363413840532303 2023-01-23 22:30:30.264677: step: 236/77, loss: 0.12969405949115753 2023-01-23 22:30:31.539582: step: 240/77, loss: 0.013874266296625137 2023-01-23 22:30:32.837379: step: 244/77, loss: 0.01584259234368801 2023-01-23 22:30:34.106177: step: 248/77, loss: 0.014385403133928776 2023-01-23 22:30:35.445556: step: 252/77, loss: 0.0032219949644058943 2023-01-23 22:30:36.749685: step: 256/77, loss: 0.006406520493328571 2023-01-23 22:30:38.065667: step: 260/77, loss: 0.022642673924565315 2023-01-23 22:30:39.374632: step: 264/77, loss: 0.021770788356661797 2023-01-23 22:30:40.660354: step: 268/77, loss: 0.13582871854305267 2023-01-23 22:30:41.978504: step: 272/77, loss: 0.016680482774972916 2023-01-23 22:30:43.324631: step: 276/77, loss: 0.02853316254913807 2023-01-23 22:30:44.601463: step: 280/77, loss: 0.01124812476336956 2023-01-23 22:30:45.930555: step: 284/77, loss: 0.038071952760219574 2023-01-23 22:30:47.188874: step: 288/77, loss: 0.016053292900323868 2023-01-23 22:30:48.508666: step: 292/77, loss: 0.02302248775959015 2023-01-23 22:30:49.861691: step: 296/77, loss: 0.02152237854897976 2023-01-23 22:30:51.154856: step: 300/77, loss: 0.004397551063448191 2023-01-23 22:30:52.453520: step: 304/77, loss: 0.05093690752983093 2023-01-23 22:30:53.749799: step: 308/77, loss: 0.02377844601869583 2023-01-23 22:30:55.096784: step: 312/77, loss: 0.06021858751773834 2023-01-23 22:30:56.392955: step: 316/77, loss: 0.26077935099601746 2023-01-23 22:30:57.736905: step: 320/77, loss: 0.012799538671970367 2023-01-23 22:30:59.066835: step: 324/77, loss: 0.013254077173769474 2023-01-23 22:31:00.356087: step: 328/77, loss: 0.0028235381469130516 2023-01-23 22:31:01.618695: step: 332/77, loss: 0.004842108115553856 2023-01-23 22:31:02.936511: step: 336/77, loss: 0.03993977606296539 2023-01-23 22:31:04.302052: step: 340/77, loss: 0.02504277601838112 2023-01-23 22:31:05.576861: step: 344/77, loss: 0.02159244380891323 2023-01-23 22:31:06.865756: step: 348/77, loss: 0.025638697668910027 2023-01-23 22:31:08.163715: step: 352/77, loss: 0.04387909173965454 2023-01-23 22:31:09.479956: step: 356/77, loss: 0.09395100176334381 2023-01-23 22:31:10.812943: step: 360/77, loss: 0.06930564343929291 2023-01-23 22:31:12.073242: step: 364/77, loss: 0.08788342028856277 2023-01-23 22:31:13.395804: step: 368/77, loss: 0.033105265349149704 2023-01-23 22:31:14.754917: step: 372/77, loss: 0.06653378158807755 2023-01-23 22:31:16.099401: step: 376/77, loss: 0.008012992329895496 2023-01-23 22:31:17.384432: step: 380/77, loss: 0.018076276406645775 2023-01-23 22:31:18.682263: step: 384/77, loss: 0.031726736575365067 2023-01-23 22:31:19.988795: step: 388/77, loss: 0.07097798585891724 ================================================== Loss: 0.037 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Chinese: {'template': {'p': 0.9821428571428571, 'r': 0.4330708661417323, 'f1': 0.6010928961748634}, 'slot': {'p': 0.6818181818181818, 'r': 0.012975778546712802, 'f1': 0.025466893039049233}, 'combined': 0.015307968493417572, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Korean: {'template': {'p': 0.9821428571428571, 'r': 0.4330708661417323, 'f1': 0.6010928961748634}, 'slot': {'p': 0.6818181818181818, 'r': 0.012975778546712802, 'f1': 0.025466893039049233}, 'combined': 0.015307968493417572, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Russian: {'template': {'p': 0.9821428571428571, 'r': 0.4330708661417323, 'f1': 0.6010928961748634}, 'slot': {'p': 0.6818181818181818, 'r': 0.012975778546712802, 'f1': 0.025466893039049233}, 'combined': 0.015307968493417572, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 5 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:33:03.483651: step: 4/77, loss: 0.01851249672472477 2023-01-23 22:33:04.801937: step: 8/77, loss: 0.0035652555525302887 2023-01-23 22:33:06.107297: step: 12/77, loss: 0.007702311966568232 2023-01-23 22:33:07.369141: step: 16/77, loss: 0.01385408453643322 2023-01-23 22:33:08.666246: step: 20/77, loss: 0.026695922017097473 2023-01-23 22:33:09.933128: step: 24/77, loss: 0.012424022890627384 2023-01-23 22:33:11.256854: step: 28/77, loss: 0.007287586107850075 2023-01-23 22:33:12.566946: step: 32/77, loss: 0.037353646010160446 2023-01-23 22:33:13.852235: step: 36/77, loss: 0.018462033942341805 2023-01-23 22:33:15.163795: step: 40/77, loss: 0.016039032489061356 2023-01-23 22:33:16.448290: step: 44/77, loss: 0.06680099666118622 2023-01-23 22:33:17.748880: step: 48/77, loss: 0.006608352065086365 2023-01-23 22:33:19.063327: step: 52/77, loss: 0.10271197557449341 2023-01-23 22:33:20.382291: step: 56/77, loss: 0.017130937427282333 2023-01-23 22:33:21.721980: step: 60/77, loss: 0.01108469720929861 2023-01-23 22:33:23.027039: step: 64/77, loss: 0.048407115042209625 2023-01-23 22:33:24.288363: step: 68/77, loss: 0.03218407183885574 2023-01-23 22:33:25.585088: step: 72/77, loss: 0.0029523978009819984 2023-01-23 22:33:26.830967: step: 76/77, loss: 0.03068627417087555 2023-01-23 22:33:28.165187: step: 80/77, loss: 0.002274099737405777 2023-01-23 22:33:29.455364: step: 84/77, loss: 0.01065951120108366 2023-01-23 22:33:30.738890: step: 88/77, loss: 0.0039505185559391975 2023-01-23 22:33:32.003415: step: 92/77, loss: 0.03132602944970131 2023-01-23 22:33:33.309903: step: 96/77, loss: 0.047224149107933044 2023-01-23 22:33:34.654882: step: 100/77, loss: 0.037667229771614075 2023-01-23 22:33:35.950161: step: 104/77, loss: 0.030513830482959747 2023-01-23 22:33:37.278265: step: 108/77, loss: 0.02144569344818592 2023-01-23 22:33:38.544128: step: 112/77, loss: 0.008346985094249249 2023-01-23 22:33:39.872656: step: 116/77, loss: 0.03922979533672333 2023-01-23 22:33:41.195316: step: 120/77, loss: 0.0027566482312977314 2023-01-23 22:33:42.448341: step: 124/77, loss: 0.004905262961983681 2023-01-23 22:33:43.747978: step: 128/77, loss: 0.007458773907274008 2023-01-23 22:33:45.002583: step: 132/77, loss: 0.026997998356819153 2023-01-23 22:33:46.270559: step: 136/77, loss: 0.021584073081612587 2023-01-23 22:33:47.571655: step: 140/77, loss: 0.013400858268141747 2023-01-23 22:33:48.877018: step: 144/77, loss: 0.021433046087622643 2023-01-23 22:33:50.168305: step: 148/77, loss: 0.025518041104078293 2023-01-23 22:33:51.467767: step: 152/77, loss: 0.050987645983695984 2023-01-23 22:33:52.748918: step: 156/77, loss: 0.010998288169503212 2023-01-23 22:33:54.036913: step: 160/77, loss: 0.046618834137916565 2023-01-23 22:33:55.374508: step: 164/77, loss: 0.06384487450122833 2023-01-23 22:33:56.659408: step: 168/77, loss: 0.016236823052167892 2023-01-23 22:33:57.975963: step: 172/77, loss: 0.031272441148757935 2023-01-23 22:33:59.285126: step: 176/77, loss: 0.036344870924949646 2023-01-23 22:34:00.617287: step: 180/77, loss: 0.02237529307603836 2023-01-23 22:34:01.954073: step: 184/77, loss: 0.034511446952819824 2023-01-23 22:34:03.227919: step: 188/77, loss: 0.038873665034770966 2023-01-23 22:34:04.514395: step: 192/77, loss: 0.03066616877913475 2023-01-23 22:34:05.821134: step: 196/77, loss: 0.0007045451202429831 2023-01-23 22:34:07.154360: step: 200/77, loss: 0.06683935970067978 2023-01-23 22:34:08.455534: step: 204/77, loss: 0.030061164870858192 2023-01-23 22:34:09.741529: step: 208/77, loss: 0.05396779626607895 2023-01-23 22:34:11.048905: step: 212/77, loss: 0.012820908799767494 2023-01-23 22:34:12.358002: step: 216/77, loss: 0.009320125915110111 2023-01-23 22:34:13.695813: step: 220/77, loss: 0.0747046247124672 2023-01-23 22:34:14.981641: step: 224/77, loss: 0.009744771756231785 2023-01-23 22:34:16.283763: step: 228/77, loss: 0.004006761126220226 2023-01-23 22:34:17.572842: step: 232/77, loss: 0.013168903067708015 2023-01-23 22:34:18.855693: step: 236/77, loss: 0.07993464171886444 2023-01-23 22:34:20.153864: step: 240/77, loss: 0.03175496682524681 2023-01-23 22:34:21.455327: step: 244/77, loss: 0.023099705576896667 2023-01-23 22:34:22.762022: step: 248/77, loss: 0.0038555117789655924 2023-01-23 22:34:24.038037: step: 252/77, loss: 0.04137454554438591 2023-01-23 22:34:25.341296: step: 256/77, loss: 0.002440269570797682 2023-01-23 22:34:26.658382: step: 260/77, loss: 0.007243657484650612 2023-01-23 22:34:27.954833: step: 264/77, loss: 0.03407406806945801 2023-01-23 22:34:29.258619: step: 268/77, loss: 0.04122215881943703 2023-01-23 22:34:30.576791: step: 272/77, loss: 0.037244781851768494 2023-01-23 22:34:31.843999: step: 276/77, loss: 0.04572469741106033 2023-01-23 22:34:33.129595: step: 280/77, loss: 0.022644642740488052 2023-01-23 22:34:34.472138: step: 284/77, loss: 0.0011615646071732044 2023-01-23 22:34:35.772065: step: 288/77, loss: 0.014798080548644066 2023-01-23 22:34:37.055391: step: 292/77, loss: 0.023985128849744797 2023-01-23 22:34:38.371209: step: 296/77, loss: 0.02150266245007515 2023-01-23 22:34:39.702293: step: 300/77, loss: 0.007881814613938332 2023-01-23 22:34:40.994364: step: 304/77, loss: 0.031038884073495865 2023-01-23 22:34:42.278801: step: 308/77, loss: 0.004278877750039101 2023-01-23 22:34:43.565148: step: 312/77, loss: 0.007023588288575411 2023-01-23 22:34:44.840539: step: 316/77, loss: 0.05231161415576935 2023-01-23 22:34:46.170752: step: 320/77, loss: 0.06902115792036057 2023-01-23 22:34:47.485364: step: 324/77, loss: 0.012647450901567936 2023-01-23 22:34:48.765286: step: 328/77, loss: 0.07448658347129822 2023-01-23 22:34:50.143211: step: 332/77, loss: 0.018296927213668823 2023-01-23 22:34:51.449024: step: 336/77, loss: 0.04754916578531265 2023-01-23 22:34:52.751760: step: 340/77, loss: 0.23820245265960693 2023-01-23 22:34:54.039196: step: 344/77, loss: 0.02240445651113987 2023-01-23 22:34:55.300970: step: 348/77, loss: 0.047524698078632355 2023-01-23 22:34:56.589817: step: 352/77, loss: 0.04264065623283386 2023-01-23 22:34:57.865352: step: 356/77, loss: 0.07267670333385468 2023-01-23 22:34:59.214563: step: 360/77, loss: 0.02015545964241028 2023-01-23 22:35:00.486459: step: 364/77, loss: 0.04709238559007645 2023-01-23 22:35:01.831851: step: 368/77, loss: 0.09137171506881714 2023-01-23 22:35:03.115014: step: 372/77, loss: 0.05773291736841202 2023-01-23 22:35:04.421738: step: 376/77, loss: 0.02109794318675995 2023-01-23 22:35:05.769195: step: 380/77, loss: 0.02238389477133751 2023-01-23 22:35:07.105248: step: 384/77, loss: 0.04301886260509491 2023-01-23 22:35:08.473324: step: 388/77, loss: 0.01773441582918167 ================================================== Loss: 0.031 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test Chinese: {'template': {'p': 0.9466666666666667, 'r': 0.5590551181102362, 'f1': 0.7029702970297029}, 'slot': {'p': 0.7307692307692307, 'r': 0.01643598615916955, 'f1': 0.032148900169204735}, 'combined': 0.02259972190112412, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test Korean: {'template': {'p': 0.9594594594594594, 'r': 0.5590551181102362, 'f1': 0.7064676616915422}, 'slot': {'p': 0.7307692307692307, 'r': 0.01643598615916955, 'f1': 0.032148900169204735}, 'combined': 0.022712158328492892, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test Russian: {'template': {'p': 0.9342105263157895, 'r': 0.5590551181102362, 'f1': 0.6995073891625616}, 'slot': {'p': 0.7407407407407407, 'r': 0.01730103806228374, 'f1': 0.03381234150464919}, 'combined': 0.023651982727390077, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 6 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:36:51.795504: step: 4/77, loss: 0.019876815378665924 2023-01-23 22:36:53.064441: step: 8/77, loss: 0.006573471240699291 2023-01-23 22:36:54.345261: step: 12/77, loss: 0.015288086608052254 2023-01-23 22:36:55.601911: step: 16/77, loss: 0.03685459867119789 2023-01-23 22:36:56.878915: step: 20/77, loss: 0.025068249553442 2023-01-23 22:36:58.201616: step: 24/77, loss: 0.004917709622532129 2023-01-23 22:36:59.482691: step: 28/77, loss: 0.041336797177791595 2023-01-23 22:37:00.756551: step: 32/77, loss: 0.015547524206340313 2023-01-23 22:37:02.063237: step: 36/77, loss: 0.022921713069081306 2023-01-23 22:37:03.365967: step: 40/77, loss: 0.0036199199967086315 2023-01-23 22:37:04.686589: step: 44/77, loss: 0.0027790761087089777 2023-01-23 22:37:05.967321: step: 48/77, loss: 0.011076805181801319 2023-01-23 22:37:07.239033: step: 52/77, loss: 0.00021039244893472642 2023-01-23 22:37:08.582109: step: 56/77, loss: 0.002944371895864606 2023-01-23 22:37:09.885897: step: 60/77, loss: 0.025461489334702492 2023-01-23 22:37:11.177086: step: 64/77, loss: 0.04321763664484024 2023-01-23 22:37:12.483094: step: 68/77, loss: 0.005923053249716759 2023-01-23 22:37:13.808870: step: 72/77, loss: 0.0004399519821163267 2023-01-23 22:37:15.136813: step: 76/77, loss: 0.010212014429271221 2023-01-23 22:37:16.459688: step: 80/77, loss: 0.0259318295866251 2023-01-23 22:37:17.719547: step: 84/77, loss: 0.022111227735877037 2023-01-23 22:37:19.005149: step: 88/77, loss: 0.023291099816560745 2023-01-23 22:37:20.320512: step: 92/77, loss: 0.018929187208414078 2023-01-23 22:37:21.602876: step: 96/77, loss: 0.014732517302036285 2023-01-23 22:37:22.848646: step: 100/77, loss: 0.02497703768312931 2023-01-23 22:37:24.163586: step: 104/77, loss: 0.068994902074337 2023-01-23 22:37:25.428900: step: 108/77, loss: 0.02583613246679306 2023-01-23 22:37:26.726601: step: 112/77, loss: 0.05988915264606476 2023-01-23 22:37:27.993735: step: 116/77, loss: 0.060604531317949295 2023-01-23 22:37:29.272183: step: 120/77, loss: 0.020236942917108536 2023-01-23 22:37:30.603777: step: 124/77, loss: 0.049723561853170395 2023-01-23 22:37:31.877553: step: 128/77, loss: 0.008909706957638264 2023-01-23 22:37:33.148898: step: 132/77, loss: 0.0024141171015799046 2023-01-23 22:37:34.403158: step: 136/77, loss: 0.02390531823039055 2023-01-23 22:37:35.673149: step: 140/77, loss: 0.057607464492321014 2023-01-23 22:37:36.975790: step: 144/77, loss: 0.00355730508454144 2023-01-23 22:37:38.261005: step: 148/77, loss: 0.010880752466619015 2023-01-23 22:37:39.578530: step: 152/77, loss: 0.016689520329236984 2023-01-23 22:37:40.899614: step: 156/77, loss: 0.0038037316408008337 2023-01-23 22:37:42.209017: step: 160/77, loss: 0.017490610480308533 2023-01-23 22:37:43.479766: step: 164/77, loss: 0.0006976961740292609 2023-01-23 22:37:44.753420: step: 168/77, loss: 0.008576530031859875 2023-01-23 22:37:46.070981: step: 172/77, loss: 0.054237280040979385 2023-01-23 22:37:47.379489: step: 176/77, loss: 0.025560835376381874 2023-01-23 22:37:48.685367: step: 180/77, loss: 0.02417994663119316 2023-01-23 22:37:50.010400: step: 184/77, loss: 0.05693662539124489 2023-01-23 22:37:51.331737: step: 188/77, loss: 0.0032692819368094206 2023-01-23 22:37:52.615054: step: 192/77, loss: 0.008004766888916492 2023-01-23 22:37:53.947789: step: 196/77, loss: 0.06952568143606186 2023-01-23 22:37:55.259828: step: 200/77, loss: 0.019822662696242332 2023-01-23 22:37:56.552378: step: 204/77, loss: 0.05271018296480179 2023-01-23 22:37:57.839649: step: 208/77, loss: 0.012663454748690128 2023-01-23 22:37:59.149678: step: 212/77, loss: 0.007480953354388475 2023-01-23 22:38:00.426409: step: 216/77, loss: 0.07426632195711136 2023-01-23 22:38:01.716070: step: 220/77, loss: 0.021370479837059975 2023-01-23 22:38:03.000869: step: 224/77, loss: 0.003257451578974724 2023-01-23 22:38:04.285413: step: 228/77, loss: 0.010057792067527771 2023-01-23 22:38:05.612325: step: 232/77, loss: 0.06914263963699341 2023-01-23 22:38:06.924782: step: 236/77, loss: 0.0067864772863686085 2023-01-23 22:38:08.255960: step: 240/77, loss: 0.05100144073367119 2023-01-23 22:38:09.546443: step: 244/77, loss: 0.04917469248175621 2023-01-23 22:38:10.861826: step: 248/77, loss: 0.03630446270108223 2023-01-23 22:38:12.186985: step: 252/77, loss: 0.0772506445646286 2023-01-23 22:38:13.456196: step: 256/77, loss: 0.014203687198460102 2023-01-23 22:38:14.743143: step: 260/77, loss: 0.053320031613111496 2023-01-23 22:38:16.087915: step: 264/77, loss: 0.015590444207191467 2023-01-23 22:38:17.459348: step: 268/77, loss: 0.01844686083495617 2023-01-23 22:38:18.768254: step: 272/77, loss: 0.04268677532672882 2023-01-23 22:38:20.104399: step: 276/77, loss: 0.023348961025476456 2023-01-23 22:38:21.424941: step: 280/77, loss: 0.013826183043420315 2023-01-23 22:38:22.670528: step: 284/77, loss: 0.014793286100029945 2023-01-23 22:38:23.968506: step: 288/77, loss: 0.009313910268247128 2023-01-23 22:38:25.287322: step: 292/77, loss: 0.023486994206905365 2023-01-23 22:38:26.553319: step: 296/77, loss: 0.022837920114398003 2023-01-23 22:38:27.842983: step: 300/77, loss: 0.006142630707472563 2023-01-23 22:38:29.147255: step: 304/77, loss: 0.014211054891347885 2023-01-23 22:38:30.443419: step: 308/77, loss: 0.03037683293223381 2023-01-23 22:38:31.729185: step: 312/77, loss: 0.014047280885279179 2023-01-23 22:38:33.091096: step: 316/77, loss: 0.01892756298184395 2023-01-23 22:38:34.407604: step: 320/77, loss: 0.009444999508559704 2023-01-23 22:38:35.739924: step: 324/77, loss: 0.002001096960157156 2023-01-23 22:38:37.022024: step: 328/77, loss: 0.03141818940639496 2023-01-23 22:38:38.295330: step: 332/77, loss: 0.006161955185234547 2023-01-23 22:38:39.545301: step: 336/77, loss: 0.004971682094037533 2023-01-23 22:38:40.819947: step: 340/77, loss: 0.00327905360609293 2023-01-23 22:38:42.140006: step: 344/77, loss: 0.02420627512037754 2023-01-23 22:38:43.466311: step: 348/77, loss: 0.0510546937584877 2023-01-23 22:38:44.810405: step: 352/77, loss: 0.0859951302409172 2023-01-23 22:38:46.194127: step: 356/77, loss: 0.004114494659006596 2023-01-23 22:38:47.509551: step: 360/77, loss: 0.05849332734942436 2023-01-23 22:38:48.808365: step: 364/77, loss: 0.012778722681105137 2023-01-23 22:38:50.143002: step: 368/77, loss: 0.0038407668471336365 2023-01-23 22:38:51.452713: step: 372/77, loss: 0.0127643421292305 2023-01-23 22:38:52.773604: step: 376/77, loss: 0.013638246804475784 2023-01-23 22:38:54.072571: step: 380/77, loss: 0.03323771804571152 2023-01-23 22:38:55.401882: step: 384/77, loss: 0.0005775393219664693 2023-01-23 22:38:56.677036: step: 388/77, loss: 0.00024180466425605118 ================================================== Loss: 0.024 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 6} Test Chinese: {'template': {'p': 0.9594594594594594, 'r': 0.5590551181102362, 'f1': 0.7064676616915422}, 'slot': {'p': 0.6363636363636364, 'r': 0.018166089965397925, 'f1': 0.03532380151387721}, 'combined': 0.02495512345756499, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 6} Test Korean: {'template': {'p': 0.96, 'r': 0.5669291338582677, 'f1': 0.712871287128713}, 'slot': {'p': 0.6285714285714286, 'r': 0.01903114186851211, 'f1': 0.036943744752308987}, 'combined': 0.026336134872933143, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 6} Test Russian: {'template': {'p': 0.9466666666666667, 'r': 0.5590551181102362, 'f1': 0.7029702970297029}, 'slot': {'p': 0.6176470588235294, 'r': 0.018166089965397925, 'f1': 0.03529411764705882}, 'combined': 0.024810716365754222, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 7 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:40:40.435360: step: 4/77, loss: 0.014091260731220245 2023-01-23 22:40:41.708296: step: 8/77, loss: 0.000623593048658222 2023-01-23 22:40:43.029173: step: 12/77, loss: 0.0031894927378743887 2023-01-23 22:40:44.321959: step: 16/77, loss: 0.0008812348823994398 2023-01-23 22:40:45.635396: step: 20/77, loss: 0.014165188185870647 2023-01-23 22:40:46.941266: step: 24/77, loss: 0.01300637237727642 2023-01-23 22:40:48.251467: step: 28/77, loss: 0.003656288841739297 2023-01-23 22:40:49.540815: step: 32/77, loss: 0.004824704956263304 2023-01-23 22:40:50.800034: step: 36/77, loss: 0.0013270446797832847 2023-01-23 22:40:52.099906: step: 40/77, loss: 0.006385405547916889 2023-01-23 22:40:53.354509: step: 44/77, loss: 0.005329822190105915 2023-01-23 22:40:54.651430: step: 48/77, loss: 0.001831568544730544 2023-01-23 22:40:55.956328: step: 52/77, loss: 0.038037534803152084 2023-01-23 22:40:57.216934: step: 56/77, loss: 0.009928743354976177 2023-01-23 22:40:58.513905: step: 60/77, loss: 0.01838652975857258 2023-01-23 22:40:59.766649: step: 64/77, loss: 0.02379920333623886 2023-01-23 22:41:01.073043: step: 68/77, loss: 0.035091448575258255 2023-01-23 22:41:02.401235: step: 72/77, loss: 0.004036274738609791 2023-01-23 22:41:03.722630: step: 76/77, loss: 0.008674428798258305 2023-01-23 22:41:04.957054: step: 80/77, loss: 0.001725957146845758 2023-01-23 22:41:06.238500: step: 84/77, loss: 0.0015108429361134768 2023-01-23 22:41:07.566796: step: 88/77, loss: 0.012778017669916153 2023-01-23 22:41:08.883375: step: 92/77, loss: 0.02824033610522747 2023-01-23 22:41:10.152197: step: 96/77, loss: 0.000985867576673627 2023-01-23 22:41:11.444406: step: 100/77, loss: 0.028359783813357353 2023-01-23 22:41:12.757166: step: 104/77, loss: 0.004324179142713547 2023-01-23 22:41:14.116920: step: 108/77, loss: 0.27105656266212463 2023-01-23 22:41:15.403113: step: 112/77, loss: 0.019737599417567253 2023-01-23 22:41:16.707475: step: 116/77, loss: 0.009240414947271347 2023-01-23 22:41:18.063005: step: 120/77, loss: 0.004861234221607447 2023-01-23 22:41:19.374101: step: 124/77, loss: 0.03176651522517204 2023-01-23 22:41:20.718524: step: 128/77, loss: 0.0288337804377079 2023-01-23 22:41:22.020532: step: 132/77, loss: 0.007038864307105541 2023-01-23 22:41:23.311427: step: 136/77, loss: 0.012824473902583122 2023-01-23 22:41:24.581122: step: 140/77, loss: 0.006008431315422058 2023-01-23 22:41:25.900165: step: 144/77, loss: 0.057693202048540115 2023-01-23 22:41:27.182180: step: 148/77, loss: 0.025265760719776154 2023-01-23 22:41:28.477836: step: 152/77, loss: 0.006553241517394781 2023-01-23 22:41:29.780750: step: 156/77, loss: 0.004011549986898899 2023-01-23 22:41:31.097334: step: 160/77, loss: 0.009259817190468311 2023-01-23 22:41:32.385575: step: 164/77, loss: 0.005273655988276005 2023-01-23 22:41:33.664612: step: 168/77, loss: 0.06386923044919968 2023-01-23 22:41:34.916488: step: 172/77, loss: 0.021036511287093163 2023-01-23 22:41:36.220470: step: 176/77, loss: 0.01947185769677162 2023-01-23 22:41:37.543202: step: 180/77, loss: 0.0021108086220920086 2023-01-23 22:41:38.822502: step: 184/77, loss: 0.0008717880118638277 2023-01-23 22:41:40.132793: step: 188/77, loss: 0.003025084501132369 2023-01-23 22:41:41.450559: step: 192/77, loss: 0.021636178717017174 2023-01-23 22:41:42.751470: step: 196/77, loss: 0.018800625577569008 2023-01-23 22:41:44.014683: step: 200/77, loss: 0.017199119552969933 2023-01-23 22:41:45.335722: step: 204/77, loss: 0.04579906910657883 2023-01-23 22:41:46.624617: step: 208/77, loss: 0.04112239554524422 2023-01-23 22:41:47.903086: step: 212/77, loss: 0.017853224650025368 2023-01-23 22:41:49.181318: step: 216/77, loss: 0.01161262672394514 2023-01-23 22:41:50.509622: step: 220/77, loss: 0.01685524359345436 2023-01-23 22:41:51.813217: step: 224/77, loss: 0.0001876596943475306 2023-01-23 22:41:53.103509: step: 228/77, loss: 0.007752103731036186 2023-01-23 22:41:54.363620: step: 232/77, loss: 0.012243285775184631 2023-01-23 22:41:55.729658: step: 236/77, loss: 0.06392502039670944 2023-01-23 22:41:57.070000: step: 240/77, loss: 0.03169390186667442 2023-01-23 22:41:58.362127: step: 244/77, loss: 0.05966990441083908 2023-01-23 22:41:59.690381: step: 248/77, loss: 0.0021885111927986145 2023-01-23 22:42:00.991245: step: 252/77, loss: 0.16490457952022552 2023-01-23 22:42:02.280412: step: 256/77, loss: 0.027331221848726273 2023-01-23 22:42:03.578762: step: 260/77, loss: 0.019304146990180016 2023-01-23 22:42:04.854834: step: 264/77, loss: 0.0014638921711593866 2023-01-23 22:42:06.158667: step: 268/77, loss: 0.04890373349189758 2023-01-23 22:42:07.442107: step: 272/77, loss: 0.0009773285128176212 2023-01-23 22:42:08.728408: step: 276/77, loss: 0.0015155820874497294 2023-01-23 22:42:10.061024: step: 280/77, loss: 0.00379112153314054 2023-01-23 22:42:11.325752: step: 284/77, loss: 0.03754507750272751 2023-01-23 22:42:12.599421: step: 288/77, loss: 0.021101878955960274 2023-01-23 22:42:13.900103: step: 292/77, loss: 0.019474411383271217 2023-01-23 22:42:15.221156: step: 296/77, loss: 0.018521897494792938 2023-01-23 22:42:16.508172: step: 300/77, loss: 0.001744049252010882 2023-01-23 22:42:17.781817: step: 304/77, loss: 0.006619489751756191 2023-01-23 22:42:19.073617: step: 308/77, loss: 0.03244337439537048 2023-01-23 22:42:20.406269: step: 312/77, loss: 0.0007960422663018107 2023-01-23 22:42:21.724803: step: 316/77, loss: 0.010220680385828018 2023-01-23 22:42:23.064317: step: 320/77, loss: 0.01762489601969719 2023-01-23 22:42:24.383150: step: 324/77, loss: 0.14098475873470306 2023-01-23 22:42:25.700874: step: 328/77, loss: 0.00601299898698926 2023-01-23 22:42:27.013788: step: 332/77, loss: 0.012527402490377426 2023-01-23 22:42:28.330215: step: 336/77, loss: 0.000528876087628305 2023-01-23 22:42:29.623495: step: 340/77, loss: 0.0026341930497437716 2023-01-23 22:42:30.910189: step: 344/77, loss: 0.05650842562317848 2023-01-23 22:42:32.180160: step: 348/77, loss: 0.014022696763277054 2023-01-23 22:42:33.485951: step: 352/77, loss: 0.0021680789068341255 2023-01-23 22:42:34.787096: step: 356/77, loss: 8.263064228231087e-05 2023-01-23 22:42:36.061255: step: 360/77, loss: 0.020330294966697693 2023-01-23 22:42:37.395697: step: 364/77, loss: 0.04933106154203415 2023-01-23 22:42:38.695698: step: 368/77, loss: 0.001341789378784597 2023-01-23 22:42:39.954586: step: 372/77, loss: 0.002500483300536871 2023-01-23 22:42:41.239073: step: 376/77, loss: 0.041689809411764145 2023-01-23 22:42:42.567591: step: 380/77, loss: 0.023196659982204437 2023-01-23 22:42:43.840114: step: 384/77, loss: 0.1326698660850525 2023-01-23 22:42:45.172402: step: 388/77, loss: 0.04675935208797455 ================================================== Loss: 0.023 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Chinese: {'template': {'p': 0.9466666666666667, 'r': 0.5590551181102362, 'f1': 0.7029702970297029}, 'slot': {'p': 0.6470588235294118, 'r': 0.01903114186851211, 'f1': 0.03697478991596639}, 'combined': 0.025992179049837756, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Korean: {'template': {'p': 0.9473684210526315, 'r': 0.5669291338582677, 'f1': 0.70935960591133}, 'slot': {'p': 0.6388888888888888, 'r': 0.019896193771626297, 'f1': 0.03859060402684564}, 'combined': 0.027374615664363406, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Russian: {'template': {'p': 0.9466666666666667, 'r': 0.5590551181102362, 'f1': 0.7029702970297029}, 'slot': {'p': 0.6470588235294118, 'r': 0.01903114186851211, 'f1': 0.03697478991596639}, 'combined': 0.025992179049837756, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 8 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:44:28.504291: step: 4/77, loss: 0.03909171372652054 2023-01-23 22:44:29.805056: step: 8/77, loss: 0.04083281010389328 2023-01-23 22:44:31.087816: step: 12/77, loss: 0.02047703228890896 2023-01-23 22:44:32.382590: step: 16/77, loss: 0.028162088245153427 2023-01-23 22:44:33.712825: step: 20/77, loss: 0.019167069345712662 2023-01-23 22:44:34.980595: step: 24/77, loss: 0.030149690806865692 2023-01-23 22:44:36.285459: step: 28/77, loss: 0.04303986579179764 2023-01-23 22:44:37.610070: step: 32/77, loss: 0.0117753054946661 2023-01-23 22:44:38.917772: step: 36/77, loss: 0.005166348069906235 2023-01-23 22:44:40.207841: step: 40/77, loss: 0.0857258141040802 2023-01-23 22:44:41.486850: step: 44/77, loss: 0.00024584803031757474 2023-01-23 22:44:42.832621: step: 48/77, loss: 0.011664533987641335 2023-01-23 22:44:44.070481: step: 52/77, loss: 0.034976307302713394 2023-01-23 22:44:45.356161: step: 56/77, loss: 0.006636979524046183 2023-01-23 22:44:46.623194: step: 60/77, loss: 0.05367900803685188 2023-01-23 22:44:47.921467: step: 64/77, loss: 0.048779040575027466 2023-01-23 22:44:49.261812: step: 68/77, loss: 0.0183818731456995 2023-01-23 22:44:50.578131: step: 72/77, loss: 0.010400182567536831 2023-01-23 22:44:51.848367: step: 76/77, loss: 0.05637083947658539 2023-01-23 22:44:53.099031: step: 80/77, loss: 0.0005344225792214274 2023-01-23 22:44:54.399772: step: 84/77, loss: 0.00033000862458720803 2023-01-23 22:44:55.680457: step: 88/77, loss: 0.018841277807950974 2023-01-23 22:44:56.972868: step: 92/77, loss: 0.023982657119631767 2023-01-23 22:44:58.233200: step: 96/77, loss: 0.005666761659085751 2023-01-23 22:44:59.502580: step: 100/77, loss: 0.05443336069583893 2023-01-23 22:45:00.815034: step: 104/77, loss: 0.0061767855659127235 2023-01-23 22:45:02.122615: step: 108/77, loss: 0.008791591040790081 2023-01-23 22:45:03.401317: step: 112/77, loss: 0.01809808798134327 2023-01-23 22:45:04.678629: step: 116/77, loss: 0.03698926791548729 2023-01-23 22:45:05.957267: step: 120/77, loss: 0.02199491485953331 2023-01-23 22:45:07.200530: step: 124/77, loss: 0.002461447613313794 2023-01-23 22:45:08.468109: step: 128/77, loss: 0.013487190008163452 2023-01-23 22:45:09.695155: step: 132/77, loss: 0.028719313442707062 2023-01-23 22:45:11.008124: step: 136/77, loss: 0.07887116074562073 2023-01-23 22:45:12.324380: step: 140/77, loss: 0.06149774789810181 2023-01-23 22:45:13.604821: step: 144/77, loss: 0.005725775845348835 2023-01-23 22:45:14.890067: step: 148/77, loss: 0.007735730614513159 2023-01-23 22:45:16.189725: step: 152/77, loss: 0.013402965851128101 2023-01-23 22:45:17.528174: step: 156/77, loss: 0.0026370673440396786 2023-01-23 22:45:18.807588: step: 160/77, loss: 0.0006408776971511543 2023-01-23 22:45:20.132643: step: 164/77, loss: 0.015061311423778534 2023-01-23 22:45:21.441020: step: 168/77, loss: 0.009904321283102036 2023-01-23 22:45:22.760755: step: 172/77, loss: 0.013677509501576424 2023-01-23 22:45:24.088976: step: 176/77, loss: 0.06900405138731003 2023-01-23 22:45:25.386040: step: 180/77, loss: 0.018023595213890076 2023-01-23 22:45:26.685833: step: 184/77, loss: 0.031008142977952957 2023-01-23 22:45:27.976798: step: 188/77, loss: 0.03302126005291939 2023-01-23 22:45:29.253250: step: 192/77, loss: 0.007039917167276144 2023-01-23 22:45:30.550551: step: 196/77, loss: 0.002231169492006302 2023-01-23 22:45:31.848533: step: 200/77, loss: 0.008826125413179398 2023-01-23 22:45:33.179165: step: 204/77, loss: 0.016067208722233772 2023-01-23 22:45:34.482018: step: 208/77, loss: 0.0906267911195755 2023-01-23 22:45:35.768709: step: 212/77, loss: 0.003105991752818227 2023-01-23 22:45:37.094773: step: 216/77, loss: 0.03075207956135273 2023-01-23 22:45:38.432046: step: 220/77, loss: 0.04361942410469055 2023-01-23 22:45:39.722400: step: 224/77, loss: 0.010997829958796501 2023-01-23 22:45:41.018632: step: 228/77, loss: 0.01769120618700981 2023-01-23 22:45:42.338716: step: 232/77, loss: 0.008495952002704144 2023-01-23 22:45:43.636068: step: 236/77, loss: 0.0071330564096570015 2023-01-23 22:45:44.926307: step: 240/77, loss: 0.00010605436546029523 2023-01-23 22:45:46.240122: step: 244/77, loss: 0.016715597361326218 2023-01-23 22:45:47.530964: step: 248/77, loss: 0.007385910488665104 2023-01-23 22:45:48.794064: step: 252/77, loss: 0.01896490901708603 2023-01-23 22:45:50.090529: step: 256/77, loss: 0.039543408900499344 2023-01-23 22:45:51.396329: step: 260/77, loss: 0.014666267670691013 2023-01-23 22:45:52.725940: step: 264/77, loss: 0.012767759151756763 2023-01-23 22:45:54.045034: step: 268/77, loss: 0.011936817318201065 2023-01-23 22:45:55.319104: step: 272/77, loss: 0.005859396420419216 2023-01-23 22:45:56.587435: step: 276/77, loss: 0.0017303384374827147 2023-01-23 22:45:57.864486: step: 280/77, loss: 0.05166729539632797 2023-01-23 22:45:59.175326: step: 284/77, loss: 0.028724398463964462 2023-01-23 22:46:00.473582: step: 288/77, loss: 0.027749208733439445 2023-01-23 22:46:01.731713: step: 292/77, loss: 0.008368278853595257 2023-01-23 22:46:03.024658: step: 296/77, loss: 0.013057144358754158 2023-01-23 22:46:04.366975: step: 300/77, loss: 0.011596021242439747 2023-01-23 22:46:05.687681: step: 304/77, loss: 0.0006313698249869049 2023-01-23 22:46:06.965564: step: 308/77, loss: 0.052643369883298874 2023-01-23 22:46:08.220655: step: 312/77, loss: 0.010507049039006233 2023-01-23 22:46:09.513781: step: 316/77, loss: 0.01833794265985489 2023-01-23 22:46:10.815558: step: 320/77, loss: 0.007980125956237316 2023-01-23 22:46:12.091863: step: 324/77, loss: 0.006608211435377598 2023-01-23 22:46:13.328893: step: 328/77, loss: 0.02376813068985939 2023-01-23 22:46:14.644144: step: 332/77, loss: 6.894932448631153e-05 2023-01-23 22:46:15.898999: step: 336/77, loss: 0.011127547360956669 2023-01-23 22:46:17.221809: step: 340/77, loss: 0.009277567267417908 2023-01-23 22:46:18.507370: step: 344/77, loss: 0.04166052117943764 2023-01-23 22:46:19.859873: step: 348/77, loss: 0.046544525772333145 2023-01-23 22:46:21.131197: step: 352/77, loss: 0.0034268973395228386 2023-01-23 22:46:22.416396: step: 356/77, loss: 0.030889064073562622 2023-01-23 22:46:23.723065: step: 360/77, loss: 0.047395166009664536 2023-01-23 22:46:25.004875: step: 364/77, loss: 0.009107564575970173 2023-01-23 22:46:26.322696: step: 368/77, loss: 0.02338031865656376 2023-01-23 22:46:27.592359: step: 372/77, loss: 0.04273316636681557 2023-01-23 22:46:28.852839: step: 376/77, loss: 0.017735697329044342 2023-01-23 22:46:30.164466: step: 380/77, loss: 5.373685416998342e-05 2023-01-23 22:46:31.448671: step: 384/77, loss: 3.152326462441124e-05 2023-01-23 22:46:32.746922: step: 388/77, loss: 0.02466980367898941 ================================================== Loss: 0.022 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Chinese: {'template': {'p': 0.9324324324324325, 'r': 0.5433070866141733, 'f1': 0.6865671641791046}, 'slot': {'p': 0.6388888888888888, 'r': 0.019896193771626297, 'f1': 0.03859060402684564}, 'combined': 0.026495041570670144, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Korean: {'template': {'p': 0.918918918918919, 'r': 0.5354330708661418, 'f1': 0.6766169154228856}, 'slot': {'p': 0.6052631578947368, 'r': 0.019896193771626297, 'f1': 0.038525963149078725}, 'combined': 0.026067318349625408, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Russian: {'template': {'p': 0.9315068493150684, 'r': 0.5354330708661418, 'f1': 0.6799999999999999}, 'slot': {'p': 0.6216216216216216, 'r': 0.019896193771626297, 'f1': 0.03855825649622799}, 'combined': 0.02621961441743503, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 9 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:48:13.106863: step: 4/77, loss: 0.004985678009688854 2023-01-23 22:48:14.405978: step: 8/77, loss: 3.863613983412506e-06 2023-01-23 22:48:15.687706: step: 12/77, loss: 0.0014464558335021138 2023-01-23 22:48:16.928174: step: 16/77, loss: 0.005553169641643763 2023-01-23 22:48:18.240244: step: 20/77, loss: 0.0022822213359177113 2023-01-23 22:48:19.541259: step: 24/77, loss: 0.01146892923861742 2023-01-23 22:48:20.817444: step: 28/77, loss: 0.005904552526772022 2023-01-23 22:48:22.110017: step: 32/77, loss: 0.09429851174354553 2023-01-23 22:48:23.400834: step: 36/77, loss: 0.0034883248154073954 2023-01-23 22:48:24.744367: step: 40/77, loss: 0.04095402732491493 2023-01-23 22:48:26.041175: step: 44/77, loss: 0.001694257603958249 2023-01-23 22:48:27.319103: step: 48/77, loss: 0.030682552605867386 2023-01-23 22:48:28.610314: step: 52/77, loss: 0.003378424560651183 2023-01-23 22:48:29.922635: step: 56/77, loss: 0.02623351290822029 2023-01-23 22:48:31.240098: step: 60/77, loss: 0.017934948205947876 2023-01-23 22:48:32.486804: step: 64/77, loss: 0.004223274532705545 2023-01-23 22:48:33.764415: step: 68/77, loss: 0.002698099473491311 2023-01-23 22:48:35.065625: step: 72/77, loss: 0.00402219919487834 2023-01-23 22:48:36.356502: step: 76/77, loss: 0.013955757021903992 2023-01-23 22:48:37.701549: step: 80/77, loss: 0.04232095927000046 2023-01-23 22:48:38.930983: step: 84/77, loss: 0.024030989035964012 2023-01-23 22:48:40.215509: step: 88/77, loss: 0.017762908712029457 2023-01-23 22:48:41.516741: step: 92/77, loss: 0.008113443851470947 2023-01-23 22:48:42.773366: step: 96/77, loss: 0.005497557111084461 2023-01-23 22:48:44.051013: step: 100/77, loss: 0.014417735859751701 2023-01-23 22:48:45.360937: step: 104/77, loss: 0.03320337459445 2023-01-23 22:48:46.695828: step: 108/77, loss: 0.06536999344825745 2023-01-23 22:48:47.939529: step: 112/77, loss: 0.006031247787177563 2023-01-23 22:48:49.255061: step: 116/77, loss: 0.011752675287425518 2023-01-23 22:48:50.529704: step: 120/77, loss: 0.002265932969748974 2023-01-23 22:48:51.808473: step: 124/77, loss: 0.03896312415599823 2023-01-23 22:48:53.093098: step: 128/77, loss: 0.02578730881214142 2023-01-23 22:48:54.394312: step: 132/77, loss: 0.013655617833137512 2023-01-23 22:48:55.701729: step: 136/77, loss: 0.0001007022219710052 2023-01-23 22:48:56.973350: step: 140/77, loss: 0.02868984267115593 2023-01-23 22:48:58.328027: step: 144/77, loss: 0.0007975643966346979 2023-01-23 22:48:59.609877: step: 148/77, loss: 0.000653879891615361 2023-01-23 22:49:00.937601: step: 152/77, loss: 0.0015032761730253696 2023-01-23 22:49:02.235638: step: 156/77, loss: 0.007357191760092974 2023-01-23 22:49:03.514782: step: 160/77, loss: 0.010839682072401047 2023-01-23 22:49:04.799890: step: 164/77, loss: 0.008317798376083374 2023-01-23 22:49:06.123270: step: 168/77, loss: 0.00936589390039444 2023-01-23 22:49:07.441436: step: 172/77, loss: 0.007547073066234589 2023-01-23 22:49:08.739349: step: 176/77, loss: 0.005637632217258215 2023-01-23 22:49:10.011671: step: 180/77, loss: 0.010725868865847588 2023-01-23 22:49:11.277924: step: 184/77, loss: 0.027186449617147446 2023-01-23 22:49:12.582478: step: 188/77, loss: 0.004381542094051838 2023-01-23 22:49:13.879960: step: 192/77, loss: 0.017316747456789017 2023-01-23 22:49:15.155290: step: 196/77, loss: 0.009244048036634922 2023-01-23 22:49:16.469296: step: 200/77, loss: 0.011271592229604721 2023-01-23 22:49:17.804944: step: 204/77, loss: 0.023467810824513435 2023-01-23 22:49:19.135502: step: 208/77, loss: 0.01169417891651392 2023-01-23 22:49:20.401108: step: 212/77, loss: 0.018470294773578644 2023-01-23 22:49:21.677666: step: 216/77, loss: 0.0076330117881298065 2023-01-23 22:49:22.965778: step: 220/77, loss: 0.04853637143969536 2023-01-23 22:49:24.264203: step: 224/77, loss: 0.026607543230056763 2023-01-23 22:49:25.585408: step: 228/77, loss: 0.013537176884710789 2023-01-23 22:49:26.820105: step: 232/77, loss: 0.02130974270403385 2023-01-23 22:49:28.099075: step: 236/77, loss: 0.026897411793470383 2023-01-23 22:49:29.384458: step: 240/77, loss: 8.246965444413945e-05 2023-01-23 22:49:30.617782: step: 244/77, loss: 0.03548573702573776 2023-01-23 22:49:31.889605: step: 248/77, loss: 0.01436652708798647 2023-01-23 22:49:33.197841: step: 252/77, loss: 0.00015419725968968123 2023-01-23 22:49:34.530767: step: 256/77, loss: 0.030067767947912216 2023-01-23 22:49:35.831558: step: 260/77, loss: 0.03384627774357796 2023-01-23 22:49:37.089511: step: 264/77, loss: 0.009953079745173454 2023-01-23 22:49:38.347794: step: 268/77, loss: 0.08683308959007263 2023-01-23 22:49:39.659939: step: 272/77, loss: 0.05754532665014267 2023-01-23 22:49:40.921231: step: 276/77, loss: 0.001794222043827176 2023-01-23 22:49:42.217725: step: 280/77, loss: 0.011959494091570377 2023-01-23 22:49:43.526018: step: 284/77, loss: 0.013683785684406757 2023-01-23 22:49:44.825774: step: 288/77, loss: 0.0354127436876297 2023-01-23 22:49:46.117548: step: 292/77, loss: 0.00022566976258531213 2023-01-23 22:49:47.383239: step: 296/77, loss: 0.05970189720392227 2023-01-23 22:49:48.621664: step: 300/77, loss: 0.001040084520354867 2023-01-23 22:49:49.952260: step: 304/77, loss: 0.0011714230058714747 2023-01-23 22:49:51.232341: step: 308/77, loss: 0.0047460198402404785 2023-01-23 22:49:52.501226: step: 312/77, loss: 0.02429712750017643 2023-01-23 22:49:53.781658: step: 316/77, loss: 0.026674669235944748 2023-01-23 22:49:55.085628: step: 320/77, loss: 0.010548888705670834 2023-01-23 22:49:56.349107: step: 324/77, loss: 0.006413971073925495 2023-01-23 22:49:57.617509: step: 328/77, loss: 0.004589063581079245 2023-01-23 22:49:58.836754: step: 332/77, loss: 0.06987367570400238 2023-01-23 22:50:00.169191: step: 336/77, loss: 0.0022110692225396633 2023-01-23 22:50:01.421846: step: 340/77, loss: 0.017331529408693314 2023-01-23 22:50:02.716847: step: 344/77, loss: 0.010062351822853088 2023-01-23 22:50:04.000495: step: 348/77, loss: 0.031465794891119 2023-01-23 22:50:05.306768: step: 352/77, loss: 0.02157551422715187 2023-01-23 22:50:06.616214: step: 356/77, loss: 0.02133113332092762 2023-01-23 22:50:07.920271: step: 360/77, loss: 0.008301690220832825 2023-01-23 22:50:09.184648: step: 364/77, loss: 0.09201312810182571 2023-01-23 22:50:10.479428: step: 368/77, loss: 0.006819041445851326 2023-01-23 22:50:11.752303: step: 372/77, loss: 0.006157260853797197 2023-01-23 22:50:13.047680: step: 376/77, loss: 0.0027986792847514153 2023-01-23 22:50:14.331233: step: 380/77, loss: 0.003340411465615034 2023-01-23 22:50:15.611085: step: 384/77, loss: 0.026231326162815094 2023-01-23 22:50:16.906376: step: 388/77, loss: 0.006379453465342522 ================================================== Loss: 0.018 -------------------- Dev Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05107997664915353, 'epoch': 9} Test Chinese: {'template': {'p': 0.9090909090909091, 'r': 0.5511811023622047, 'f1': 0.6862745098039216}, 'slot': {'p': 0.5581395348837209, 'r': 0.020761245674740483, 'f1': 0.040033361134278564}, 'combined': 0.027473875288230386, 'epoch': 9} Dev Korean: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05107997664915353, 'epoch': 9} Test Korean: {'template': {'p': 0.9090909090909091, 'r': 0.5511811023622047, 'f1': 0.6862745098039216}, 'slot': {'p': 0.5365853658536586, 'r': 0.01903114186851211, 'f1': 0.036758563074352546}, 'combined': 0.025226464854947825, 'epoch': 9} Dev Russian: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05107997664915353, 'epoch': 9} Test Russian: {'template': {'p': 0.9090909090909091, 'r': 0.5511811023622047, 'f1': 0.6862745098039216}, 'slot': {'p': 0.5581395348837209, 'r': 0.020761245674740483, 'f1': 0.040033361134278564}, 'combined': 0.027473875288230386, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 10 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:51:57.094308: step: 4/77, loss: 0.034885916858911514 2023-01-23 22:51:58.318022: step: 8/77, loss: 0.025922216475009918 2023-01-23 22:51:59.656553: step: 12/77, loss: 0.018966445699334145 2023-01-23 22:52:00.953795: step: 16/77, loss: 0.007525881752371788 2023-01-23 22:52:02.193516: step: 20/77, loss: 0.00297100399620831 2023-01-23 22:52:03.478511: step: 24/77, loss: 0.022766780108213425 2023-01-23 22:52:04.754611: step: 28/77, loss: 0.005847464315593243 2023-01-23 22:52:06.059635: step: 32/77, loss: 0.0022809680085629225 2023-01-23 22:52:07.304171: step: 36/77, loss: 0.0008194573456421494 2023-01-23 22:52:08.593227: step: 40/77, loss: 0.007935411296784878 2023-01-23 22:52:09.878907: step: 44/77, loss: 0.01920030452311039 2023-01-23 22:52:11.163795: step: 48/77, loss: 0.02657415345311165 2023-01-23 22:52:12.445332: step: 52/77, loss: 0.0011227937648072839 2023-01-23 22:52:13.721940: step: 56/77, loss: 0.0020502624101936817 2023-01-23 22:52:14.985695: step: 60/77, loss: 0.04245453327894211 2023-01-23 22:52:16.306704: step: 64/77, loss: 0.0015092970570549369 2023-01-23 22:52:17.521496: step: 68/77, loss: 0.022562090307474136 2023-01-23 22:52:18.849950: step: 72/77, loss: 0.02246209979057312 2023-01-23 22:52:20.116189: step: 76/77, loss: 0.00920990388840437 2023-01-23 22:52:21.359315: step: 80/77, loss: 8.592795893491711e-06 2023-01-23 22:52:22.608452: step: 84/77, loss: 0.0023487601429224014 2023-01-23 22:52:23.911055: step: 88/77, loss: 0.010793939232826233 2023-01-23 22:52:25.201746: step: 92/77, loss: 0.11554623395204544 2023-01-23 22:52:26.431510: step: 96/77, loss: 0.01655627228319645 2023-01-23 22:52:27.685016: step: 100/77, loss: 0.01300885435193777 2023-01-23 22:52:28.928022: step: 104/77, loss: 0.028351565822958946 2023-01-23 22:52:30.251719: step: 108/77, loss: 0.0009378978866152465 2023-01-23 22:52:31.509834: step: 112/77, loss: 0.02903454378247261 2023-01-23 22:52:32.772521: step: 116/77, loss: 0.000774328364059329 2023-01-23 22:52:34.043170: step: 120/77, loss: 0.00030405400320887566 2023-01-23 22:52:35.345319: step: 124/77, loss: 0.007459406740963459 2023-01-23 22:52:36.623482: step: 128/77, loss: 0.031962983310222626 2023-01-23 22:52:37.950414: step: 132/77, loss: 0.0007741327863186598 2023-01-23 22:52:39.258440: step: 136/77, loss: 0.001975886756554246 2023-01-23 22:52:40.585532: step: 140/77, loss: 0.0006277412758208811 2023-01-23 22:52:41.891100: step: 144/77, loss: 0.00018086688942275941 2023-01-23 22:52:43.190738: step: 148/77, loss: 0.025780564174056053 2023-01-23 22:52:44.459217: step: 152/77, loss: 0.0059568691067397594 2023-01-23 22:52:45.740158: step: 156/77, loss: 0.0019144975813105702 2023-01-23 22:52:47.014056: step: 160/77, loss: 0.031837016344070435 2023-01-23 22:52:48.359359: step: 164/77, loss: 0.0005284567596390843 2023-01-23 22:52:49.635977: step: 168/77, loss: 0.006015297025442123 2023-01-23 22:52:50.957816: step: 172/77, loss: 0.0005159162101335824 2023-01-23 22:52:52.242395: step: 176/77, loss: 0.03496188297867775 2023-01-23 22:52:53.528713: step: 180/77, loss: 0.021903792396187782 2023-01-23 22:52:54.802419: step: 184/77, loss: 0.00761670945212245 2023-01-23 22:52:56.079648: step: 188/77, loss: 0.02137245237827301 2023-01-23 22:52:57.339203: step: 192/77, loss: 0.007971450686454773 2023-01-23 22:52:58.600821: step: 196/77, loss: 0.005901147145777941 2023-01-23 22:52:59.953354: step: 200/77, loss: 0.014218917116522789 2023-01-23 22:53:01.234812: step: 204/77, loss: 0.021402429789304733 2023-01-23 22:53:02.495882: step: 208/77, loss: 0.04705355688929558 2023-01-23 22:53:03.766297: step: 212/77, loss: 0.04193755239248276 2023-01-23 22:53:05.035973: step: 216/77, loss: 0.009806877002120018 2023-01-23 22:53:06.333669: step: 220/77, loss: 0.012827225029468536 2023-01-23 22:53:07.625442: step: 224/77, loss: 0.016613418236374855 2023-01-23 22:53:08.948730: step: 228/77, loss: 0.04089561104774475 2023-01-23 22:53:10.237455: step: 232/77, loss: 0.022792337462306023 2023-01-23 22:53:11.502323: step: 236/77, loss: 0.0008552936487831175 2023-01-23 22:53:12.784245: step: 240/77, loss: 0.01568450964987278 2023-01-23 22:53:14.068402: step: 244/77, loss: 0.007928731851279736 2023-01-23 22:53:15.328939: step: 248/77, loss: 3.1405874324264005e-05 2023-01-23 22:53:16.622756: step: 252/77, loss: 0.006512498948723078 2023-01-23 22:53:17.901637: step: 256/77, loss: 0.00061405188171193 2023-01-23 22:53:19.243224: step: 260/77, loss: 0.010790829546749592 2023-01-23 22:53:20.607694: step: 264/77, loss: 0.015889519825577736 2023-01-23 22:53:21.916995: step: 268/77, loss: 0.032671183347702026 2023-01-23 22:53:23.145734: step: 272/77, loss: 0.08655279874801636 2023-01-23 22:53:24.445946: step: 276/77, loss: 0.009039109572768211 2023-01-23 22:53:25.668005: step: 280/77, loss: 0.010499601252377033 2023-01-23 22:53:26.980730: step: 284/77, loss: 0.0036574043333530426 2023-01-23 22:53:28.292004: step: 288/77, loss: 0.022612404078245163 2023-01-23 22:53:29.570507: step: 292/77, loss: 0.000292752287350595 2023-01-23 22:53:30.806880: step: 296/77, loss: 0.0065917023457586765 2023-01-23 22:53:32.077702: step: 300/77, loss: 0.0009956855792552233 2023-01-23 22:53:33.388101: step: 304/77, loss: 0.006485821679234505 2023-01-23 22:53:34.626139: step: 308/77, loss: 0.060920167714357376 2023-01-23 22:53:35.884542: step: 312/77, loss: 0.003009687177836895 2023-01-23 22:53:37.157738: step: 316/77, loss: 0.09430146217346191 2023-01-23 22:53:38.468887: step: 320/77, loss: 0.013427263125777245 2023-01-23 22:53:39.733081: step: 324/77, loss: 0.041191231459379196 2023-01-23 22:53:41.011110: step: 328/77, loss: 0.05467259883880615 2023-01-23 22:53:42.260846: step: 332/77, loss: 0.0028161683585494757 2023-01-23 22:53:43.529314: step: 336/77, loss: 0.01637094095349312 2023-01-23 22:53:44.797705: step: 340/77, loss: 0.05374343320727348 2023-01-23 22:53:46.111821: step: 344/77, loss: 0.024343477562069893 2023-01-23 22:53:47.371793: step: 348/77, loss: 0.020151842385530472 2023-01-23 22:53:48.711775: step: 352/77, loss: 0.06068253517150879 2023-01-23 22:53:50.014660: step: 356/77, loss: 0.01416888926178217 2023-01-23 22:53:51.312992: step: 360/77, loss: 0.025863591581583023 2023-01-23 22:53:52.589718: step: 364/77, loss: 0.006620073691010475 2023-01-23 22:53:53.875038: step: 368/77, loss: 0.007929705083370209 2023-01-23 22:53:55.179138: step: 372/77, loss: 2.493147985660471e-05 2023-01-23 22:53:56.517633: step: 376/77, loss: 0.07580535858869553 2023-01-23 22:53:57.850014: step: 380/77, loss: 0.006525078788399696 2023-01-23 22:53:59.117949: step: 384/77, loss: 0.004044658504426479 2023-01-23 22:54:00.394945: step: 388/77, loss: 0.14865602552890778 ================================================== Loss: 0.020 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04988944951527864, 'epoch': 10} Test Chinese: {'template': {'p': 0.9565217391304348, 'r': 0.5196850393700787, 'f1': 0.673469387755102}, 'slot': {'p': 0.5714285714285714, 'r': 0.01730103806228374, 'f1': 0.033585222502099076}, 'combined': 0.02261861923610754, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04988944951527864, 'epoch': 10} Test Korean: {'template': {'p': 0.9565217391304348, 'r': 0.5196850393700787, 'f1': 0.673469387755102}, 'slot': {'p': 0.5714285714285714, 'r': 0.01730103806228374, 'f1': 0.033585222502099076}, 'combined': 0.02261861923610754, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04988944951527864, 'epoch': 10} Test Russian: {'template': {'p': 0.9571428571428572, 'r': 0.5275590551181102, 'f1': 0.680203045685279}, 'slot': {'p': 0.5714285714285714, 'r': 0.01730103806228374, 'f1': 0.033585222502099076}, 'combined': 0.022844770635945557, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 11 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:55:40.840629: step: 4/77, loss: 0.034290995448827744 2023-01-23 22:55:42.171422: step: 8/77, loss: 0.003060400253161788 2023-01-23 22:55:43.452422: step: 12/77, loss: 0.016873084008693695 2023-01-23 22:55:44.758711: step: 16/77, loss: 0.044476039707660675 2023-01-23 22:55:46.043466: step: 20/77, loss: 0.1535889059305191 2023-01-23 22:55:47.321860: step: 24/77, loss: 0.02128823660314083 2023-01-23 22:55:48.611566: step: 28/77, loss: 0.005422608926892281 2023-01-23 22:55:49.925842: step: 32/77, loss: 0.0096100103110075 2023-01-23 22:55:51.222763: step: 36/77, loss: 0.043695032596588135 2023-01-23 22:55:52.449153: step: 40/77, loss: 0.07770724594593048 2023-01-23 22:55:53.712108: step: 44/77, loss: 0.0007819668389856815 2023-01-23 22:55:54.995549: step: 48/77, loss: 0.014020076021552086 2023-01-23 22:55:56.224418: step: 52/77, loss: 0.008628785610198975 2023-01-23 22:55:57.515250: step: 56/77, loss: 0.0005642471369355917 2023-01-23 22:55:58.846818: step: 60/77, loss: 0.006489362567663193 2023-01-23 22:56:00.134131: step: 64/77, loss: 0.0010391109390184283 2023-01-23 22:56:01.395916: step: 68/77, loss: 0.0034049940295517445 2023-01-23 22:56:02.718377: step: 72/77, loss: 0.021129030734300613 2023-01-23 22:56:04.022350: step: 76/77, loss: 0.000404253660235554 2023-01-23 22:56:05.295191: step: 80/77, loss: 0.029367826879024506 2023-01-23 22:56:06.577840: step: 84/77, loss: 0.000733956869225949 2023-01-23 22:56:07.878138: step: 88/77, loss: 0.015665430575609207 2023-01-23 22:56:09.166531: step: 92/77, loss: 0.0027978597208857536 2023-01-23 22:56:10.487948: step: 96/77, loss: 0.04804677516222 2023-01-23 22:56:11.743390: step: 100/77, loss: 0.006995899602770805 2023-01-23 22:56:13.004129: step: 104/77, loss: 0.0016960708890110254 2023-01-23 22:56:14.271018: step: 108/77, loss: 0.02806232124567032 2023-01-23 22:56:15.579727: step: 112/77, loss: 0.040813934057950974 2023-01-23 22:56:16.878326: step: 116/77, loss: 0.04757900536060333 2023-01-23 22:56:18.143456: step: 120/77, loss: 0.06963416934013367 2023-01-23 22:56:19.394750: step: 124/77, loss: 0.008742544800043106 2023-01-23 22:56:20.645368: step: 128/77, loss: 0.0003210227587260306 2023-01-23 22:56:21.954721: step: 132/77, loss: 0.03588179498910904 2023-01-23 22:56:23.248504: step: 136/77, loss: 0.00010144505358766764 2023-01-23 22:56:24.526825: step: 140/77, loss: 0.028663935139775276 2023-01-23 22:56:25.801684: step: 144/77, loss: 0.004867684096097946 2023-01-23 22:56:27.074145: step: 148/77, loss: 0.006418164353817701 2023-01-23 22:56:28.377520: step: 152/77, loss: 0.010072818025946617 2023-01-23 22:56:29.687549: step: 156/77, loss: 0.0030261133797466755 2023-01-23 22:56:30.997347: step: 160/77, loss: 0.012433314695954323 2023-01-23 22:56:32.291173: step: 164/77, loss: 1.3659241631103214e-05 2023-01-23 22:56:33.564879: step: 168/77, loss: 0.010078934952616692 2023-01-23 22:56:34.871773: step: 172/77, loss: 0.008565003052353859 2023-01-23 22:56:36.170608: step: 176/77, loss: 0.0013229025062173605 2023-01-23 22:56:37.463849: step: 180/77, loss: 0.0023587632458657026 2023-01-23 22:56:38.767225: step: 184/77, loss: 0.015727929770946503 2023-01-23 22:56:40.061064: step: 188/77, loss: 0.0003538941964507103 2023-01-23 22:56:41.334168: step: 192/77, loss: 0.005967576522380114 2023-01-23 22:56:42.576571: step: 196/77, loss: 0.0034459210000932217 2023-01-23 22:56:43.840862: step: 200/77, loss: 0.0008628435316495597 2023-01-23 22:56:45.139268: step: 204/77, loss: 0.003831970738247037 2023-01-23 22:56:46.434271: step: 208/77, loss: 0.04347836226224899 2023-01-23 22:56:47.782876: step: 212/77, loss: 0.03995771333575249 2023-01-23 22:56:49.051797: step: 216/77, loss: 0.08100616931915283 2023-01-23 22:56:50.402354: step: 220/77, loss: 0.0014499538810923696 2023-01-23 22:56:51.662443: step: 224/77, loss: 0.016846509650349617 2023-01-23 22:56:53.020994: step: 228/77, loss: 0.001104260329157114 2023-01-23 22:56:54.283677: step: 232/77, loss: 0.007652912754565477 2023-01-23 22:56:55.629826: step: 236/77, loss: 0.00373618071898818 2023-01-23 22:56:56.950829: step: 240/77, loss: 0.03783169016242027 2023-01-23 22:56:58.230799: step: 244/77, loss: 0.01822231523692608 2023-01-23 22:56:59.578156: step: 248/77, loss: 0.00047604707651771605 2023-01-23 22:57:00.878720: step: 252/77, loss: 0.04717876762151718 2023-01-23 22:57:02.175400: step: 256/77, loss: 0.00917772762477398 2023-01-23 22:57:03.434454: step: 260/77, loss: 0.008899427019059658 2023-01-23 22:57:04.705821: step: 264/77, loss: 0.0171928983181715 2023-01-23 22:57:05.990552: step: 268/77, loss: 0.0008396904449909925 2023-01-23 22:57:07.297970: step: 272/77, loss: 0.0042204721830785275 2023-01-23 22:57:08.600605: step: 276/77, loss: 0.09980832785367966 2023-01-23 22:57:09.905065: step: 280/77, loss: 0.02319612167775631 2023-01-23 22:57:11.196188: step: 284/77, loss: 0.045040421187877655 2023-01-23 22:57:12.496337: step: 288/77, loss: 1.880646959762089e-05 2023-01-23 22:57:13.759609: step: 292/77, loss: 0.04108387976884842 2023-01-23 22:57:15.037602: step: 296/77, loss: 0.012998824939131737 2023-01-23 22:57:16.294702: step: 300/77, loss: 0.007574758492410183 2023-01-23 22:57:17.598159: step: 304/77, loss: 0.025194745510816574 2023-01-23 22:57:18.849258: step: 308/77, loss: 0.0016742986626923084 2023-01-23 22:57:20.168887: step: 312/77, loss: 0.006551905535161495 2023-01-23 22:57:21.496273: step: 316/77, loss: 0.0009671220905147493 2023-01-23 22:57:22.847194: step: 320/77, loss: 0.01129376981407404 2023-01-23 22:57:24.150883: step: 324/77, loss: 0.028160715475678444 2023-01-23 22:57:25.421669: step: 328/77, loss: 0.008353251032531261 2023-01-23 22:57:26.732804: step: 332/77, loss: 0.012021275237202644 2023-01-23 22:57:28.065487: step: 336/77, loss: 0.005083122290670872 2023-01-23 22:57:29.356931: step: 340/77, loss: 0.011651956476271152 2023-01-23 22:57:30.601145: step: 344/77, loss: 0.008507365360856056 2023-01-23 22:57:31.865185: step: 348/77, loss: 3.128040407318622e-05 2023-01-23 22:57:33.132234: step: 352/77, loss: 0.003318456234410405 2023-01-23 22:57:34.460054: step: 356/77, loss: 0.022203654050827026 2023-01-23 22:57:35.731177: step: 360/77, loss: 0.007937486283481121 2023-01-23 22:57:36.995844: step: 364/77, loss: 0.01284043863415718 2023-01-23 22:57:38.293357: step: 368/77, loss: 0.001803778694011271 2023-01-23 22:57:39.574026: step: 372/77, loss: 0.03172057494521141 2023-01-23 22:57:40.868112: step: 376/77, loss: 0.009788584895431995 2023-01-23 22:57:42.149376: step: 380/77, loss: 0.020835664123296738 2023-01-23 22:57:43.454028: step: 384/77, loss: 0.012116208672523499 2023-01-23 22:57:44.725295: step: 388/77, loss: 0.018130645155906677 ================================================== Loss: 0.018 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Chinese: {'template': {'p': 0.9240506329113924, 'r': 0.5748031496062992, 'f1': 0.7087378640776699}, 'slot': {'p': 0.574468085106383, 'r': 0.023356401384083045, 'f1': 0.044887780548628436}, 'combined': 0.031813669709222094, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Korean: {'template': {'p': 0.9012345679012346, 'r': 0.5748031496062992, 'f1': 0.701923076923077}, 'slot': {'p': 0.574468085106383, 'r': 0.023356401384083045, 'f1': 0.044887780548628436}, 'combined': 0.03150776903894112, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Russian: {'template': {'p': 0.9240506329113924, 'r': 0.5748031496062992, 'f1': 0.7087378640776699}, 'slot': {'p': 0.574468085106383, 'r': 0.023356401384083045, 'f1': 0.044887780548628436}, 'combined': 0.031813669709222094, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 12 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:59:24.924528: step: 4/77, loss: 0.009073804132640362 2023-01-23 22:59:26.209492: step: 8/77, loss: 0.009715680964291096 2023-01-23 22:59:27.484325: step: 12/77, loss: 0.06424564123153687 2023-01-23 22:59:28.752915: step: 16/77, loss: 0.06681496649980545 2023-01-23 22:59:29.995888: step: 20/77, loss: 0.002531634643673897 2023-01-23 22:59:31.242314: step: 24/77, loss: 0.0005347937112674117 2023-01-23 22:59:32.527257: step: 28/77, loss: 0.04869755357503891 2023-01-23 22:59:33.842477: step: 32/77, loss: 0.051517218351364136 2023-01-23 22:59:35.123467: step: 36/77, loss: 0.008260335773229599 2023-01-23 22:59:36.402620: step: 40/77, loss: 3.5608525649877265e-05 2023-01-23 22:59:37.674386: step: 44/77, loss: 0.03319783881306648 2023-01-23 22:59:38.968688: step: 48/77, loss: 0.0017925648717209697 2023-01-23 22:59:40.236974: step: 52/77, loss: 0.00929616391658783 2023-01-23 22:59:41.505860: step: 56/77, loss: 0.0027852028142660856 2023-01-23 22:59:42.802063: step: 60/77, loss: 0.020751886069774628 2023-01-23 22:59:44.106445: step: 64/77, loss: 0.0002482630079612136 2023-01-23 22:59:45.407384: step: 68/77, loss: 0.0005034186178818345 2023-01-23 22:59:46.690963: step: 72/77, loss: 0.00035241639125160873 2023-01-23 22:59:47.942075: step: 76/77, loss: 0.00031961730564944446 2023-01-23 22:59:49.263210: step: 80/77, loss: 0.007452554069459438 2023-01-23 22:59:50.544793: step: 84/77, loss: 0.007456920575350523 2023-01-23 22:59:51.811290: step: 88/77, loss: 0.009758615866303444 2023-01-23 22:59:53.091566: step: 92/77, loss: 0.0012029914651066065 2023-01-23 22:59:54.348137: step: 96/77, loss: 3.199763159500435e-05 2023-01-23 22:59:55.598549: step: 100/77, loss: 0.04619689658284187 2023-01-23 22:59:56.906179: step: 104/77, loss: 0.009478705003857613 2023-01-23 22:59:58.251885: step: 108/77, loss: 8.02664362709038e-05 2023-01-23 22:59:59.544098: step: 112/77, loss: 0.004843482282012701 2023-01-23 23:00:00.809145: step: 116/77, loss: 0.003468831069767475 2023-01-23 23:00:02.070137: step: 120/77, loss: 0.02320878580212593 2023-01-23 23:00:03.384619: step: 124/77, loss: 5.919792329223128e-06 2023-01-23 23:00:04.644432: step: 128/77, loss: 0.034282077103853226 2023-01-23 23:00:05.922873: step: 132/77, loss: 0.0011821096995845437 2023-01-23 23:00:07.231016: step: 136/77, loss: 0.030070718377828598 2023-01-23 23:00:08.491273: step: 140/77, loss: 0.004016416613012552 2023-01-23 23:00:09.743333: step: 144/77, loss: 0.005289752967655659 2023-01-23 23:00:11.016361: step: 148/77, loss: 0.001714541227556765 2023-01-23 23:00:12.302340: step: 152/77, loss: 0.001026333775371313 2023-01-23 23:00:13.601637: step: 156/77, loss: 7.020994871709263e-06 2023-01-23 23:00:14.880777: step: 160/77, loss: 0.0012839919654652476 2023-01-23 23:00:16.159369: step: 164/77, loss: 0.00011994199303444475 2023-01-23 23:00:17.425893: step: 168/77, loss: 0.0008939295657910407 2023-01-23 23:00:18.733714: step: 172/77, loss: 0.00872259121388197 2023-01-23 23:00:20.058156: step: 176/77, loss: 9.06147306523053e-06 2023-01-23 23:00:21.346547: step: 180/77, loss: 0.007759299129247665 2023-01-23 23:00:22.621003: step: 184/77, loss: 0.08365102112293243 2023-01-23 23:00:23.897837: step: 188/77, loss: 0.010917061939835548 2023-01-23 23:00:25.130826: step: 192/77, loss: 0.008356427773833275 2023-01-23 23:00:26.377004: step: 196/77, loss: 0.0070846471935510635 2023-01-23 23:00:27.639543: step: 200/77, loss: 0.018262486904859543 2023-01-23 23:00:28.896215: step: 204/77, loss: 0.019870104268193245 2023-01-23 23:00:30.180714: step: 208/77, loss: 5.0123155233450234e-05 2023-01-23 23:00:31.449787: step: 212/77, loss: 0.03604806587100029 2023-01-23 23:00:32.703400: step: 216/77, loss: 0.0016053176950663328 2023-01-23 23:00:34.011246: step: 220/77, loss: 0.03923279047012329 2023-01-23 23:00:35.295001: step: 224/77, loss: 0.1039530336856842 2023-01-23 23:00:36.591885: step: 228/77, loss: 0.0012448562774807215 2023-01-23 23:00:37.877451: step: 232/77, loss: 0.0024955361150205135 2023-01-23 23:00:39.121231: step: 236/77, loss: 0.009963253512978554 2023-01-23 23:00:40.392506: step: 240/77, loss: 0.04958149790763855 2023-01-23 23:00:41.689278: step: 244/77, loss: 0.0008791973232291639 2023-01-23 23:00:43.028619: step: 248/77, loss: 0.05147349089384079 2023-01-23 23:00:44.343709: step: 252/77, loss: 0.00014931612531654537 2023-01-23 23:00:45.598177: step: 256/77, loss: 0.016675325110554695 2023-01-23 23:00:46.868747: step: 260/77, loss: 0.014337164349853992 2023-01-23 23:00:48.170389: step: 264/77, loss: 0.018950382247567177 2023-01-23 23:00:49.489911: step: 268/77, loss: 7.146938150981441e-05 2023-01-23 23:00:50.753527: step: 272/77, loss: 0.017762595787644386 2023-01-23 23:00:52.098199: step: 276/77, loss: 0.00015441945288330317 2023-01-23 23:00:53.400211: step: 280/77, loss: 0.0028129005804657936 2023-01-23 23:00:54.724417: step: 284/77, loss: 0.14359217882156372 2023-01-23 23:00:56.007799: step: 288/77, loss: 0.0016476346645504236 2023-01-23 23:00:57.356975: step: 292/77, loss: 0.0030373530462384224 2023-01-23 23:00:58.679594: step: 296/77, loss: 0.001313269603997469 2023-01-23 23:00:59.957890: step: 300/77, loss: 0.026861613616347313 2023-01-23 23:01:01.203158: step: 304/77, loss: 0.01838192157447338 2023-01-23 23:01:02.490514: step: 308/77, loss: 0.009071671403944492 2023-01-23 23:01:03.756931: step: 312/77, loss: 0.018952257931232452 2023-01-23 23:01:05.008011: step: 316/77, loss: 0.02215559035539627 2023-01-23 23:01:06.337553: step: 320/77, loss: 0.0001870796550065279 2023-01-23 23:01:07.641635: step: 324/77, loss: 0.0033369308803230524 2023-01-23 23:01:08.933303: step: 328/77, loss: 0.015525397844612598 2023-01-23 23:01:10.213995: step: 332/77, loss: 0.003829201217740774 2023-01-23 23:01:11.489678: step: 336/77, loss: 0.011689679697155952 2023-01-23 23:01:12.775331: step: 340/77, loss: 0.00465412437915802 2023-01-23 23:01:14.056925: step: 344/77, loss: 0.010710624977946281 2023-01-23 23:01:15.346587: step: 348/77, loss: 0.005850036628544331 2023-01-23 23:01:16.644499: step: 352/77, loss: 0.0008918998064473271 2023-01-23 23:01:17.909955: step: 356/77, loss: 0.007814115844666958 2023-01-23 23:01:19.233663: step: 360/77, loss: 0.018171032890677452 2023-01-23 23:01:20.540894: step: 364/77, loss: 0.0254492349922657 2023-01-23 23:01:21.833751: step: 368/77, loss: 0.0004618678358383477 2023-01-23 23:01:23.060310: step: 372/77, loss: 0.03245954215526581 2023-01-23 23:01:24.350637: step: 376/77, loss: 0.013895172625780106 2023-01-23 23:01:25.609915: step: 380/77, loss: 0.00014311702398117632 2023-01-23 23:01:26.915916: step: 384/77, loss: 0.07297346740961075 2023-01-23 23:01:28.235210: step: 388/77, loss: 0.017625009641051292 ================================================== Loss: 0.016 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Chinese: {'template': {'p': 0.922077922077922, 'r': 0.5590551181102362, 'f1': 0.696078431372549}, 'slot': {'p': 0.5952380952380952, 'r': 0.02162629757785467, 'f1': 0.041736227045075125}, 'combined': 0.02905168745294445, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Korean: {'template': {'p': 0.922077922077922, 'r': 0.5590551181102362, 'f1': 0.696078431372549}, 'slot': {'p': 0.5813953488372093, 'r': 0.02162629757785467, 'f1': 0.041701417848206836}, 'combined': 0.02902745752179103, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Russian: {'template': {'p': 0.922077922077922, 'r': 0.5590551181102362, 'f1': 0.696078431372549}, 'slot': {'p': 0.5813953488372093, 'r': 0.02162629757785467, 'f1': 0.041701417848206836}, 'combined': 0.02902745752179103, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 13 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:03:08.498993: step: 4/77, loss: 0.011534569784998894 2023-01-23 23:03:09.752093: step: 8/77, loss: 7.703698065597564e-05 2023-01-23 23:03:11.007008: step: 12/77, loss: 0.0074141984805464745 2023-01-23 23:03:12.269865: step: 16/77, loss: 0.003941691946238279 2023-01-23 23:03:13.495835: step: 20/77, loss: 0.007250982336699963 2023-01-23 23:03:14.785323: step: 24/77, loss: 0.024784471839666367 2023-01-23 23:03:16.040019: step: 28/77, loss: 0.0064056264236569405 2023-01-23 23:03:17.373354: step: 32/77, loss: 0.0310469102114439 2023-01-23 23:03:18.664688: step: 36/77, loss: 0.017922699451446533 2023-01-23 23:03:19.888996: step: 40/77, loss: 0.01132173277437687 2023-01-23 23:03:21.217529: step: 44/77, loss: 0.0026819310151040554 2023-01-23 23:03:22.502491: step: 48/77, loss: 0.0073762321844697 2023-01-23 23:03:23.833497: step: 52/77, loss: 0.0018268930725753307 2023-01-23 23:03:25.143185: step: 56/77, loss: 0.012388748116791248 2023-01-23 23:03:26.434366: step: 60/77, loss: 0.0009196172468364239 2023-01-23 23:03:27.753690: step: 64/77, loss: 0.0004850560799241066 2023-01-23 23:03:29.125518: step: 68/77, loss: 0.03814251720905304 2023-01-23 23:03:30.424620: step: 72/77, loss: 0.024647535756230354 2023-01-23 23:03:31.697051: step: 76/77, loss: 0.006834662053734064 2023-01-23 23:03:32.965420: step: 80/77, loss: 0.014149656519293785 2023-01-23 23:03:34.266377: step: 84/77, loss: 0.029407711699604988 2023-01-23 23:03:35.501194: step: 88/77, loss: 0.054918814450502396 2023-01-23 23:03:36.761043: step: 92/77, loss: 0.019674647599458694 2023-01-23 23:03:38.055527: step: 96/77, loss: 0.0006954215932637453 2023-01-23 23:03:39.349411: step: 100/77, loss: 3.290568929514848e-05 2023-01-23 23:03:40.648875: step: 104/77, loss: 0.00822521187365055 2023-01-23 23:03:41.972647: step: 108/77, loss: 0.0008668411173857749 2023-01-23 23:03:43.257701: step: 112/77, loss: 0.017215151339769363 2023-01-23 23:03:44.601049: step: 116/77, loss: 0.0025849805679172277 2023-01-23 23:03:45.893279: step: 120/77, loss: 0.012226728722453117 2023-01-23 23:03:47.255920: step: 124/77, loss: 0.0035495597403496504 2023-01-23 23:03:48.491415: step: 128/77, loss: 0.005428432486951351 2023-01-23 23:03:49.775577: step: 132/77, loss: 0.008224151097238064 2023-01-23 23:03:51.012397: step: 136/77, loss: 0.039925575256347656 2023-01-23 23:03:52.353911: step: 140/77, loss: 0.02613389492034912 2023-01-23 23:03:53.627917: step: 144/77, loss: 0.004684390965849161 2023-01-23 23:03:54.910297: step: 148/77, loss: 0.0002262677444377914 2023-01-23 23:03:56.223008: step: 152/77, loss: 0.012023281306028366 2023-01-23 23:03:57.490585: step: 156/77, loss: 0.04951193183660507 2023-01-23 23:03:58.785916: step: 160/77, loss: 0.008101816289126873 2023-01-23 23:03:59.986933: step: 164/77, loss: 0.008501788601279259 2023-01-23 23:04:01.281150: step: 168/77, loss: 0.013952715322375298 2023-01-23 23:04:02.573389: step: 172/77, loss: 0.00011134293890791014 2023-01-23 23:04:03.887494: step: 176/77, loss: 0.004148687236011028 2023-01-23 23:04:05.151849: step: 180/77, loss: 0.0027915844693779945 2023-01-23 23:04:06.398758: step: 184/77, loss: 0.03257248178124428 2023-01-23 23:04:07.704052: step: 188/77, loss: 0.014188706874847412 2023-01-23 23:04:08.995227: step: 192/77, loss: 0.0040221610106527805 2023-01-23 23:04:10.331190: step: 196/77, loss: 0.038398560136556625 2023-01-23 23:04:11.633286: step: 200/77, loss: 2.7238772872806294e-06 2023-01-23 23:04:12.957587: step: 204/77, loss: 0.005252287723124027 2023-01-23 23:04:14.262306: step: 208/77, loss: 0.00662798760458827 2023-01-23 23:04:15.542056: step: 212/77, loss: 0.0014862954849377275 2023-01-23 23:04:16.829206: step: 216/77, loss: 0.06251075118780136 2023-01-23 23:04:18.132246: step: 220/77, loss: 0.04855694621801376 2023-01-23 23:04:19.419788: step: 224/77, loss: 8.610729855718091e-05 2023-01-23 23:04:20.730885: step: 228/77, loss: 0.00452599348500371 2023-01-23 23:04:21.997701: step: 232/77, loss: 0.003628335427492857 2023-01-23 23:04:23.231972: step: 236/77, loss: 0.001405022805556655 2023-01-23 23:04:24.515587: step: 240/77, loss: 0.0004565988201647997 2023-01-23 23:04:25.786268: step: 244/77, loss: 5.832927854498848e-05 2023-01-23 23:04:27.093397: step: 248/77, loss: 0.001974995480850339 2023-01-23 23:04:28.390637: step: 252/77, loss: 0.006026091054081917 2023-01-23 23:04:29.715374: step: 256/77, loss: 0.0033578251022845507 2023-01-23 23:04:31.011465: step: 260/77, loss: 0.049207091331481934 2023-01-23 23:04:32.295167: step: 264/77, loss: 0.006522368639707565 2023-01-23 23:04:33.558825: step: 268/77, loss: 0.02521488070487976 2023-01-23 23:04:34.825151: step: 272/77, loss: 0.035877011716365814 2023-01-23 23:04:36.129909: step: 276/77, loss: 0.006761828437447548 2023-01-23 23:04:37.447971: step: 280/77, loss: 0.0008893448393791914 2023-01-23 23:04:38.751360: step: 284/77, loss: 0.07028082758188248 2023-01-23 23:04:40.014519: step: 288/77, loss: 1.4272099178924691e-05 2023-01-23 23:04:41.305092: step: 292/77, loss: 0.024424489587545395 2023-01-23 23:04:42.564542: step: 296/77, loss: 0.005749974399805069 2023-01-23 23:04:43.887371: step: 300/77, loss: 0.00023848118144087493 2023-01-23 23:04:45.182468: step: 304/77, loss: 0.0036856308579444885 2023-01-23 23:04:46.478871: step: 308/77, loss: 0.03618558123707771 2023-01-23 23:04:47.817352: step: 312/77, loss: 0.0694272518157959 2023-01-23 23:04:49.140471: step: 316/77, loss: 0.1101406142115593 2023-01-23 23:04:50.448733: step: 320/77, loss: 0.0017924606800079346 2023-01-23 23:04:51.734151: step: 324/77, loss: 0.0023241147864609957 2023-01-23 23:04:53.047628: step: 328/77, loss: 0.0022019895259290934 2023-01-23 23:04:54.347189: step: 332/77, loss: 0.0007412676350213587 2023-01-23 23:04:55.622709: step: 336/77, loss: 0.03281879797577858 2023-01-23 23:04:56.879960: step: 340/77, loss: 0.0045754555612802505 2023-01-23 23:04:58.144314: step: 344/77, loss: 0.009205389767885208 2023-01-23 23:04:59.410595: step: 348/77, loss: 0.032856326550245285 2023-01-23 23:05:00.715821: step: 352/77, loss: 0.0010040453635156155 2023-01-23 23:05:02.026997: step: 356/77, loss: 2.4541625407437095e-06 2023-01-23 23:05:03.288299: step: 360/77, loss: 0.026075957342982292 2023-01-23 23:05:04.510020: step: 364/77, loss: 0.0016539209755137563 2023-01-23 23:05:05.800049: step: 368/77, loss: 0.00030679808696731925 2023-01-23 23:05:07.054096: step: 372/77, loss: 0.0001105769770219922 2023-01-23 23:05:08.384970: step: 376/77, loss: 0.01804657094180584 2023-01-23 23:05:09.686237: step: 380/77, loss: 0.034142449498176575 2023-01-23 23:05:10.991273: step: 384/77, loss: 0.00044037040788680315 2023-01-23 23:05:12.244735: step: 388/77, loss: 0.00038718071300536394 ================================================== Loss: 0.015 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Chinese: {'template': {'p': 0.9305555555555556, 'r': 0.5275590551181102, 'f1': 0.6733668341708542}, 'slot': {'p': 0.5853658536585366, 'r': 0.020761245674740483, 'f1': 0.040100250626566414}, 'combined': 0.02700217881386884, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.5581395348837209, 'r': 0.020761245674740483, 'f1': 0.040033361134278564}, 'combined': 0.02668890742285237, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Russian: {'template': {'p': 0.9305555555555556, 'r': 0.5275590551181102, 'f1': 0.6733668341708542}, 'slot': {'p': 0.5581395348837209, 'r': 0.020761245674740483, 'f1': 0.040033361134278564}, 'combined': 0.026957137648207674, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 14 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:06:51.914982: step: 4/77, loss: 0.006818510126322508 2023-01-23 23:06:53.207387: step: 8/77, loss: 0.0006258913199417293 2023-01-23 23:06:54.484666: step: 12/77, loss: 0.0027833329513669014 2023-01-23 23:06:55.768990: step: 16/77, loss: 0.0074259317480027676 2023-01-23 23:06:56.997052: step: 20/77, loss: 0.0012211976572871208 2023-01-23 23:06:58.214142: step: 24/77, loss: 0.004635686054825783 2023-01-23 23:06:59.522227: step: 28/77, loss: 0.010179792530834675 2023-01-23 23:07:00.789911: step: 32/77, loss: 0.0019203039119020104 2023-01-23 23:07:02.014585: step: 36/77, loss: 0.005334170069545507 2023-01-23 23:07:03.277625: step: 40/77, loss: 0.013824529945850372 2023-01-23 23:07:04.545729: step: 44/77, loss: 0.004141363315284252 2023-01-23 23:07:05.869088: step: 48/77, loss: 4.5128668716643006e-05 2023-01-23 23:07:07.168571: step: 52/77, loss: 0.052282486110925674 2023-01-23 23:07:08.441407: step: 56/77, loss: 1.9176808564225212e-05 2023-01-23 23:07:09.673540: step: 60/77, loss: 0.020106812939047813 2023-01-23 23:07:10.905389: step: 64/77, loss: 0.00036743577220477164 2023-01-23 23:07:12.159446: step: 68/77, loss: 0.022626064717769623 2023-01-23 23:07:13.419828: step: 72/77, loss: 1.0763298632809892e-05 2023-01-23 23:07:14.671985: step: 76/77, loss: 0.0008758734329603612 2023-01-23 23:07:15.895383: step: 80/77, loss: 0.028549078851938248 2023-01-23 23:07:17.158256: step: 84/77, loss: 6.134075374575332e-05 2023-01-23 23:07:18.422164: step: 88/77, loss: 0.010253140702843666 2023-01-23 23:07:19.678661: step: 92/77, loss: 1.0152663890039548e-05 2023-01-23 23:07:20.983895: step: 96/77, loss: 0.009076571092009544 2023-01-23 23:07:22.272026: step: 100/77, loss: 0.011913309805095196 2023-01-23 23:07:23.525382: step: 104/77, loss: 0.004169612191617489 2023-01-23 23:07:24.833629: step: 108/77, loss: 0.0008429823210462928 2023-01-23 23:07:26.123026: step: 112/77, loss: 0.0046945675276219845 2023-01-23 23:07:27.389683: step: 116/77, loss: 0.08648164570331573 2023-01-23 23:07:28.666154: step: 120/77, loss: 0.022534571588039398 2023-01-23 23:07:29.925213: step: 124/77, loss: 0.006535988301038742 2023-01-23 23:07:31.229753: step: 128/77, loss: 0.00017252654652111232 2023-01-23 23:07:32.489865: step: 132/77, loss: 0.0002712097193580121 2023-01-23 23:07:33.740311: step: 136/77, loss: 0.006114703603088856 2023-01-23 23:07:35.034308: step: 140/77, loss: 0.00034197320928797126 2023-01-23 23:07:36.311664: step: 144/77, loss: 8.801784133538604e-05 2023-01-23 23:07:37.650081: step: 148/77, loss: 0.002975575625896454 2023-01-23 23:07:38.933430: step: 152/77, loss: 0.01886889897286892 2023-01-23 23:07:40.233886: step: 156/77, loss: 0.011135238222777843 2023-01-23 23:07:41.470113: step: 160/77, loss: 0.0024436095263808966 2023-01-23 23:07:42.730653: step: 164/77, loss: 0.0031774109229445457 2023-01-23 23:07:43.964138: step: 168/77, loss: 0.018518783152103424 2023-01-23 23:07:45.272618: step: 172/77, loss: 0.0018485994078218937 2023-01-23 23:07:46.567847: step: 176/77, loss: 0.00010482324432814494 2023-01-23 23:07:47.877608: step: 180/77, loss: 0.0023094690404832363 2023-01-23 23:07:49.136972: step: 184/77, loss: 0.0215534046292305 2023-01-23 23:07:50.400649: step: 188/77, loss: 0.10720938444137573 2023-01-23 23:07:51.691495: step: 192/77, loss: 0.010321415960788727 2023-01-23 23:07:52.988264: step: 196/77, loss: 0.009301860816776752 2023-01-23 23:07:54.279712: step: 200/77, loss: 0.013922393321990967 2023-01-23 23:07:55.552596: step: 204/77, loss: 0.06962837278842926 2023-01-23 23:07:56.805892: step: 208/77, loss: 2.3917215003166348e-05 2023-01-23 23:07:58.062659: step: 212/77, loss: 0.0030422827694565058 2023-01-23 23:07:59.376525: step: 216/77, loss: 0.0008390427683480084 2023-01-23 23:08:00.666007: step: 220/77, loss: 0.000872556702233851 2023-01-23 23:08:01.916947: step: 224/77, loss: 0.002295607002452016 2023-01-23 23:08:03.161905: step: 228/77, loss: 0.00622314028441906 2023-01-23 23:08:04.423476: step: 232/77, loss: 0.020128877833485603 2023-01-23 23:08:05.740206: step: 236/77, loss: 0.019149256870150566 2023-01-23 23:08:07.107662: step: 240/77, loss: 0.0004515836189966649 2023-01-23 23:08:08.393813: step: 244/77, loss: 0.011811056174337864 2023-01-23 23:08:09.655014: step: 248/77, loss: 0.02650582231581211 2023-01-23 23:08:10.894101: step: 252/77, loss: 0.00040723313577473164 2023-01-23 23:08:12.178595: step: 256/77, loss: 0.0022488830145448446 2023-01-23 23:08:13.453727: step: 260/77, loss: 0.005055895075201988 2023-01-23 23:08:14.733490: step: 264/77, loss: 3.0174373932823073e-06 2023-01-23 23:08:16.027935: step: 268/77, loss: 0.0850355327129364 2023-01-23 23:08:17.254383: step: 272/77, loss: 0.005542317871004343 2023-01-23 23:08:18.543347: step: 276/77, loss: 0.0464678555727005 2023-01-23 23:08:19.868695: step: 280/77, loss: 0.0031086173839867115 2023-01-23 23:08:21.182679: step: 284/77, loss: 0.0017400861252099276 2023-01-23 23:08:22.471333: step: 288/77, loss: 0.003277476178482175 2023-01-23 23:08:23.751751: step: 292/77, loss: 0.0005285036750137806 2023-01-23 23:08:25.043651: step: 296/77, loss: 0.0001412014535162598 2023-01-23 23:08:26.383617: step: 300/77, loss: 0.029769254848361015 2023-01-23 23:08:27.675778: step: 304/77, loss: 0.016503766179084778 2023-01-23 23:08:29.027888: step: 308/77, loss: 0.002605894347652793 2023-01-23 23:08:30.348305: step: 312/77, loss: 0.09122447669506073 2023-01-23 23:08:31.607534: step: 316/77, loss: 0.0014124336885288358 2023-01-23 23:08:32.873549: step: 320/77, loss: 0.004125905688852072 2023-01-23 23:08:34.194190: step: 324/77, loss: 0.004153760150074959 2023-01-23 23:08:35.445546: step: 328/77, loss: 0.003013004781678319 2023-01-23 23:08:36.727725: step: 332/77, loss: 0.0016704755835235119 2023-01-23 23:08:38.008146: step: 336/77, loss: 0.008554182946681976 2023-01-23 23:08:39.272289: step: 340/77, loss: 0.005642848089337349 2023-01-23 23:08:40.559031: step: 344/77, loss: 0.026147441938519478 2023-01-23 23:08:41.883441: step: 348/77, loss: 0.0007405009819194674 2023-01-23 23:08:43.162698: step: 352/77, loss: 0.0007274311501532793 2023-01-23 23:08:44.403836: step: 356/77, loss: 0.002250226214528084 2023-01-23 23:08:45.707839: step: 360/77, loss: 0.007931672036647797 2023-01-23 23:08:46.980143: step: 364/77, loss: 0.005355477333068848 2023-01-23 23:08:48.266065: step: 368/77, loss: 0.0003642349038273096 2023-01-23 23:08:49.604497: step: 372/77, loss: 0.0006663693930022418 2023-01-23 23:08:50.878231: step: 376/77, loss: 0.05269888788461685 2023-01-23 23:08:52.178341: step: 380/77, loss: 0.0001794708805391565 2023-01-23 23:08:53.537226: step: 384/77, loss: 0.04961037263274193 2023-01-23 23:08:54.837395: step: 388/77, loss: 0.0022222367115318775 ================================================== Loss: 0.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 14} Test Chinese: {'template': {'p': 0.935064935064935, 'r': 0.5669291338582677, 'f1': 0.7058823529411765}, 'slot': {'p': 0.7037037037037037, 'r': 0.01643598615916955, 'f1': 0.032121724429416736}, 'combined': 0.022674158420764756, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 14} Test Korean: {'template': {'p': 0.9358974358974359, 'r': 0.5748031496062992, 'f1': 0.7121951219512195}, 'slot': {'p': 0.6785714285714286, 'r': 0.01643598615916955, 'f1': 0.03209459459459459}, 'combined': 0.022857613711272245, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 14} Test Russian: {'template': {'p': 0.9230769230769231, 'r': 0.5669291338582677, 'f1': 0.7024390243902439}, 'slot': {'p': 0.7407407407407407, 'r': 0.01730103806228374, 'f1': 0.03381234150464919}, 'combined': 0.02375110817887553, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 15 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:10:34.790437: step: 4/77, loss: 0.0004816804139409214 2023-01-23 23:10:36.060148: step: 8/77, loss: 1.6693866200512275e-05 2023-01-23 23:10:37.344592: step: 12/77, loss: 4.607753726304509e-05 2023-01-23 23:10:38.635632: step: 16/77, loss: 0.0003652075829450041 2023-01-23 23:10:39.904840: step: 20/77, loss: 0.0014403036329895258 2023-01-23 23:10:41.139743: step: 24/77, loss: 0.0024174025747925043 2023-01-23 23:10:42.439702: step: 28/77, loss: 1.7359528783345013e-06 2023-01-23 23:10:43.784063: step: 32/77, loss: 0.04290907457470894 2023-01-23 23:10:45.050841: step: 36/77, loss: 0.0003589342813938856 2023-01-23 23:10:46.287064: step: 40/77, loss: 0.012305357493460178 2023-01-23 23:10:47.581571: step: 44/77, loss: 0.019339703023433685 2023-01-23 23:10:48.846235: step: 48/77, loss: 0.003062969772145152 2023-01-23 23:10:50.096470: step: 52/77, loss: 5.2768882596865296e-05 2023-01-23 23:10:51.348167: step: 56/77, loss: 0.006697559729218483 2023-01-23 23:10:52.609206: step: 60/77, loss: 0.0011638242285698652 2023-01-23 23:10:53.903843: step: 64/77, loss: 0.01967165619134903 2023-01-23 23:10:55.104703: step: 68/77, loss: 0.004847807809710503 2023-01-23 23:10:56.360776: step: 72/77, loss: 0.000611055816989392 2023-01-23 23:10:57.683561: step: 76/77, loss: 0.0048630512319505215 2023-01-23 23:10:58.977155: step: 80/77, loss: 0.054831236600875854 2023-01-23 23:11:00.266169: step: 84/77, loss: 0.01335857156664133 2023-01-23 23:11:01.575302: step: 88/77, loss: 0.020420288667082787 2023-01-23 23:11:02.840303: step: 92/77, loss: 0.0005267034866847098 2023-01-23 23:11:04.142045: step: 96/77, loss: 0.009028802625834942 2023-01-23 23:11:05.391694: step: 100/77, loss: 0.013945749960839748 2023-01-23 23:11:06.628112: step: 104/77, loss: 0.04236004129052162 2023-01-23 23:11:07.963451: step: 108/77, loss: 0.00033831584732979536 2023-01-23 23:11:09.258160: step: 112/77, loss: 5.3080308134667575e-05 2023-01-23 23:11:10.489179: step: 116/77, loss: 0.07516567409038544 2023-01-23 23:11:11.744780: step: 120/77, loss: 0.015899505466222763 2023-01-23 23:11:13.010893: step: 124/77, loss: 0.0003939236339647323 2023-01-23 23:11:14.306261: step: 128/77, loss: 0.004468400496989489 2023-01-23 23:11:15.605209: step: 132/77, loss: 0.005055863410234451 2023-01-23 23:11:16.932624: step: 136/77, loss: 0.0025805742479860783 2023-01-23 23:11:18.229745: step: 140/77, loss: 0.0005273033166304231 2023-01-23 23:11:19.549203: step: 144/77, loss: 0.060450442135334015 2023-01-23 23:11:20.821347: step: 148/77, loss: 0.018075956031680107 2023-01-23 23:11:22.120432: step: 152/77, loss: 0.06206171587109566 2023-01-23 23:11:23.439942: step: 156/77, loss: 0.001956725725904107 2023-01-23 23:11:24.746239: step: 160/77, loss: 0.0050781648606061935 2023-01-23 23:11:26.008712: step: 164/77, loss: 0.004459173884242773 2023-01-23 23:11:27.290660: step: 168/77, loss: 0.04625125601887703 2023-01-23 23:11:28.592425: step: 172/77, loss: 0.0015759647358208895 2023-01-23 23:11:29.862920: step: 176/77, loss: 0.033126529306173325 2023-01-23 23:11:31.169337: step: 180/77, loss: 7.026261300779879e-05 2023-01-23 23:11:32.448614: step: 184/77, loss: 0.0015952292596921325 2023-01-23 23:11:33.723677: step: 188/77, loss: 0.03751169145107269 2023-01-23 23:11:34.992043: step: 192/77, loss: 0.009760575369000435 2023-01-23 23:11:36.308343: step: 196/77, loss: 0.02395324595272541 2023-01-23 23:11:37.589457: step: 200/77, loss: 0.07830063998699188 2023-01-23 23:11:38.907416: step: 204/77, loss: 6.735025090165436e-05 2023-01-23 23:11:40.191856: step: 208/77, loss: 0.002208677353337407 2023-01-23 23:11:41.480350: step: 212/77, loss: 0.006165114231407642 2023-01-23 23:11:42.824446: step: 216/77, loss: 0.005174754187464714 2023-01-23 23:11:44.159056: step: 220/77, loss: 0.0012870485661551356 2023-01-23 23:11:45.441627: step: 224/77, loss: 0.012494869530200958 2023-01-23 23:11:46.714549: step: 228/77, loss: 0.014958437532186508 2023-01-23 23:11:47.974856: step: 232/77, loss: 0.0002445927239023149 2023-01-23 23:11:49.317523: step: 236/77, loss: 0.00011449036537669599 2023-01-23 23:11:50.604060: step: 240/77, loss: 0.0002988382475450635 2023-01-23 23:11:51.909233: step: 244/77, loss: 0.005984320305287838 2023-01-23 23:11:53.200813: step: 248/77, loss: 6.18845151620917e-05 2023-01-23 23:11:54.487774: step: 252/77, loss: 0.0006847563199698925 2023-01-23 23:11:55.793615: step: 256/77, loss: 0.004405403509736061 2023-01-23 23:11:57.083045: step: 260/77, loss: 0.00732279010117054 2023-01-23 23:11:58.382572: step: 264/77, loss: 0.05142771080136299 2023-01-23 23:11:59.691107: step: 268/77, loss: 6.224772369023412e-05 2023-01-23 23:12:00.974312: step: 272/77, loss: 0.0019245930016040802 2023-01-23 23:12:02.259076: step: 276/77, loss: 0.0025355899706482887 2023-01-23 23:12:03.600238: step: 280/77, loss: 0.006086964160203934 2023-01-23 23:12:04.923039: step: 284/77, loss: 0.0012631918070837855 2023-01-23 23:12:06.226323: step: 288/77, loss: 0.0001228465116582811 2023-01-23 23:12:07.547919: step: 292/77, loss: 0.00019296916434541345 2023-01-23 23:12:08.848424: step: 296/77, loss: 0.002572681289166212 2023-01-23 23:12:10.150183: step: 300/77, loss: 1.442090706404997e-05 2023-01-23 23:12:11.451826: step: 304/77, loss: 3.7758538837806555e-06 2023-01-23 23:12:12.736005: step: 308/77, loss: 0.019748851656913757 2023-01-23 23:12:13.986729: step: 312/77, loss: 0.00014960896805860102 2023-01-23 23:12:15.296515: step: 316/77, loss: 0.011053039692342281 2023-01-23 23:12:16.601289: step: 320/77, loss: 2.8220272270118585e-06 2023-01-23 23:12:17.858947: step: 324/77, loss: 0.0003843028098344803 2023-01-23 23:12:19.139693: step: 328/77, loss: 0.01987829990684986 2023-01-23 23:12:20.416750: step: 332/77, loss: 0.09208142012357712 2023-01-23 23:12:21.718828: step: 336/77, loss: 0.003218474332243204 2023-01-23 23:12:23.023441: step: 340/77, loss: 0.0072304122149944305 2023-01-23 23:12:24.317448: step: 344/77, loss: 0.0015550671378150582 2023-01-23 23:12:25.598496: step: 348/77, loss: 0.00026606611208990216 2023-01-23 23:12:26.903037: step: 352/77, loss: 2.4905082682380453e-05 2023-01-23 23:12:28.208738: step: 356/77, loss: 0.05696500837802887 2023-01-23 23:12:29.492964: step: 360/77, loss: 4.337245991337113e-05 2023-01-23 23:12:30.745945: step: 364/77, loss: 0.015604798682034016 2023-01-23 23:12:32.085907: step: 368/77, loss: 4.735090897156624e-06 2023-01-23 23:12:33.365219: step: 372/77, loss: 5.179361323826015e-06 2023-01-23 23:12:34.693243: step: 376/77, loss: 0.012082105502486229 2023-01-23 23:12:35.985708: step: 380/77, loss: 0.00010182980622630566 2023-01-23 23:12:37.226681: step: 384/77, loss: 8.844018157105893e-05 2023-01-23 23:12:38.488006: step: 388/77, loss: 0.05117665231227875 ================================================== Loss: 0.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 15} Test Chinese: {'template': {'p': 0.9324324324324325, 'r': 0.5433070866141733, 'f1': 0.6865671641791046}, 'slot': {'p': 0.5675675675675675, 'r': 0.018166089965397925, 'f1': 0.03520536462699078}, 'combined': 0.024170847355844422, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 15} Test Korean: {'template': {'p': 0.918918918918919, 'r': 0.5354330708661418, 'f1': 0.6766169154228856}, 'slot': {'p': 0.5405405405405406, 'r': 0.01730103806228374, 'f1': 0.03352891869237217}, 'combined': 0.022686233543097588, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 15} Test Russian: {'template': {'p': 0.9315068493150684, 'r': 0.5354330708661418, 'f1': 0.6799999999999999}, 'slot': {'p': 0.5675675675675675, 'r': 0.018166089965397925, 'f1': 0.03520536462699078}, 'combined': 0.02393964794635373, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 16 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:14:18.775343: step: 4/77, loss: 4.861850356974173e-06 2023-01-23 23:14:20.066922: step: 8/77, loss: 1.5866782632656395e-05 2023-01-23 23:14:21.374516: step: 12/77, loss: 0.00033481360878795385 2023-01-23 23:14:22.672601: step: 16/77, loss: 7.987874596437905e-06 2023-01-23 23:14:23.915627: step: 20/77, loss: 0.0006085088243708014 2023-01-23 23:14:25.190129: step: 24/77, loss: 0.0014214256079867482 2023-01-23 23:14:26.431819: step: 28/77, loss: 0.033199504017829895 2023-01-23 23:14:27.703262: step: 32/77, loss: 0.002857948187738657 2023-01-23 23:14:28.989644: step: 36/77, loss: 0.002551297191530466 2023-01-23 23:14:30.234796: step: 40/77, loss: 0.0009598369360901415 2023-01-23 23:14:31.521677: step: 44/77, loss: 3.17394039939245e-07 2023-01-23 23:14:32.741667: step: 48/77, loss: 0.00013348981156013906 2023-01-23 23:14:34.053110: step: 52/77, loss: 0.00243179639801383 2023-01-23 23:14:35.303582: step: 56/77, loss: 0.00048356899060308933 2023-01-23 23:14:36.589653: step: 60/77, loss: 0.036274898797273636 2023-01-23 23:14:37.861956: step: 64/77, loss: 0.002666803542524576 2023-01-23 23:14:39.139942: step: 68/77, loss: 0.0021867984905838966 2023-01-23 23:14:40.409723: step: 72/77, loss: 0.055631570518016815 2023-01-23 23:14:41.701292: step: 76/77, loss: 0.04630818963050842 2023-01-23 23:14:42.955423: step: 80/77, loss: 0.0002477295638527721 2023-01-23 23:14:44.224879: step: 84/77, loss: 0.023699330165982246 2023-01-23 23:14:45.492101: step: 88/77, loss: 0.0026453514583408833 2023-01-23 23:14:46.774314: step: 92/77, loss: 0.014910968951880932 2023-01-23 23:14:48.024224: step: 96/77, loss: 0.0005018580122850835 2023-01-23 23:14:49.275687: step: 100/77, loss: 0.0011478365631774068 2023-01-23 23:14:50.567450: step: 104/77, loss: 0.0002814961189869791 2023-01-23 23:14:51.788026: step: 108/77, loss: 0.002626886125653982 2023-01-23 23:14:53.038258: step: 112/77, loss: 5.4958374676061794e-05 2023-01-23 23:14:54.327883: step: 116/77, loss: 4.4051979784853756e-05 2023-01-23 23:14:55.612082: step: 120/77, loss: 0.017519483342766762 2023-01-23 23:14:56.902812: step: 124/77, loss: 0.003170792944729328 2023-01-23 23:14:58.152942: step: 128/77, loss: 0.0010376194259151816 2023-01-23 23:14:59.445797: step: 132/77, loss: 1.1920615179406013e-06 2023-01-23 23:15:00.726332: step: 136/77, loss: 0.00045014932402409613 2023-01-23 23:15:02.029444: step: 140/77, loss: 0.0001678488333709538 2023-01-23 23:15:03.307195: step: 144/77, loss: 0.006620452739298344 2023-01-23 23:15:04.586126: step: 148/77, loss: 3.382553757091955e-07 2023-01-23 23:15:05.870919: step: 152/77, loss: 0.0026630829088389874 2023-01-23 23:15:07.210391: step: 156/77, loss: 2.1847074094694108e-05 2023-01-23 23:15:08.502646: step: 160/77, loss: 0.023481661453843117 2023-01-23 23:15:09.764231: step: 164/77, loss: 0.001555607421323657 2023-01-23 23:15:11.016762: step: 168/77, loss: 0.007830877788364887 2023-01-23 23:15:12.264178: step: 172/77, loss: 0.0030038796830922365 2023-01-23 23:15:13.499907: step: 176/77, loss: 0.00042911790660582483 2023-01-23 23:15:14.758133: step: 180/77, loss: 0.008290370926260948 2023-01-23 23:15:16.075581: step: 184/77, loss: 0.016199413686990738 2023-01-23 23:15:17.364487: step: 188/77, loss: 0.0002577801060397178 2023-01-23 23:15:18.667763: step: 192/77, loss: 0.00019381032325327396 2023-01-23 23:15:19.972100: step: 196/77, loss: 0.00016714620869606733 2023-01-23 23:15:21.240254: step: 200/77, loss: 0.00012267788406461477 2023-01-23 23:15:22.516347: step: 204/77, loss: 0.009114248678088188 2023-01-23 23:15:23.787057: step: 208/77, loss: 0.03911638632416725 2023-01-23 23:15:25.037489: step: 212/77, loss: 1.8671898942557164e-05 2023-01-23 23:15:26.273685: step: 216/77, loss: 0.001954711740836501 2023-01-23 23:15:27.574803: step: 220/77, loss: 0.007040996104478836 2023-01-23 23:15:28.847568: step: 224/77, loss: 1.5829691619728692e-05 2023-01-23 23:15:30.118147: step: 228/77, loss: 0.0002243506460217759 2023-01-23 23:15:31.374621: step: 232/77, loss: 0.00683977035805583 2023-01-23 23:15:32.655813: step: 236/77, loss: 2.2761918444302864e-05 2023-01-23 23:15:33.980577: step: 240/77, loss: 0.019565310329198837 2023-01-23 23:15:35.300594: step: 244/77, loss: 9.414236956217792e-06 2023-01-23 23:15:36.595604: step: 248/77, loss: 1.728589450067375e-05 2023-01-23 23:15:37.871913: step: 252/77, loss: 0.0035243923775851727 2023-01-23 23:15:39.130512: step: 256/77, loss: 0.0015222537331283092 2023-01-23 23:15:40.407887: step: 260/77, loss: 4.505701144807972e-05 2023-01-23 23:15:41.678919: step: 264/77, loss: 0.0015098822768777609 2023-01-23 23:15:42.976277: step: 268/77, loss: 0.01089445035904646 2023-01-23 23:15:44.269844: step: 272/77, loss: 0.03385207802057266 2023-01-23 23:15:45.546439: step: 276/77, loss: 0.00025587028358131647 2023-01-23 23:15:46.830081: step: 280/77, loss: 3.722103065229021e-05 2023-01-23 23:15:48.137882: step: 284/77, loss: 0.002465657889842987 2023-01-23 23:15:49.386115: step: 288/77, loss: 0.01795008033514023 2023-01-23 23:15:50.710706: step: 292/77, loss: 1.3884822692489251e-05 2023-01-23 23:15:52.007443: step: 296/77, loss: 0.007718597073107958 2023-01-23 23:15:53.276421: step: 300/77, loss: 0.028821276500821114 2023-01-23 23:15:54.575653: step: 304/77, loss: 0.06679520010948181 2023-01-23 23:15:55.878790: step: 308/77, loss: 0.016347669064998627 2023-01-23 23:15:57.124568: step: 312/77, loss: 5.412711834651418e-05 2023-01-23 23:15:58.382586: step: 316/77, loss: 0.009288772009313107 2023-01-23 23:15:59.702202: step: 320/77, loss: 0.00021472680964507163 2023-01-23 23:16:01.025039: step: 324/77, loss: 8.622468885732815e-05 2023-01-23 23:16:02.328767: step: 328/77, loss: 0.012828285805881023 2023-01-23 23:16:03.687652: step: 332/77, loss: 0.013079500757157803 2023-01-23 23:16:04.977139: step: 336/77, loss: 0.00018611534324008971 2023-01-23 23:16:06.242396: step: 340/77, loss: 0.00037904319469816983 2023-01-23 23:16:07.512944: step: 344/77, loss: 0.00024278687487822026 2023-01-23 23:16:08.736206: step: 348/77, loss: 0.03581083565950394 2023-01-23 23:16:10.020444: step: 352/77, loss: 1.897270885820035e-05 2023-01-23 23:16:11.325175: step: 356/77, loss: 0.00048672978300601244 2023-01-23 23:16:12.600736: step: 360/77, loss: 0.0008217963622882962 2023-01-23 23:16:13.870830: step: 364/77, loss: 0.0005745338276028633 2023-01-23 23:16:15.126479: step: 368/77, loss: 0.002440313808619976 2023-01-23 23:16:16.441194: step: 372/77, loss: 0.0001457735343137756 2023-01-23 23:16:17.731184: step: 376/77, loss: 0.033896248787641525 2023-01-23 23:16:19.015639: step: 380/77, loss: 0.016800181940197945 2023-01-23 23:16:20.349964: step: 384/77, loss: 0.02510041743516922 2023-01-23 23:16:21.668703: step: 388/77, loss: 0.036930397152900696 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Chinese: {'template': {'p': 0.9358974358974359, 'r': 0.5748031496062992, 'f1': 0.7121951219512195}, 'slot': {'p': 0.6060606060606061, 'r': 0.01730103806228374, 'f1': 0.0336417157275021}, 'combined': 0.02395946583519662, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Korean: {'template': {'p': 0.9240506329113924, 'r': 0.5748031496062992, 'f1': 0.7087378640776699}, 'slot': {'p': 0.6176470588235294, 'r': 0.018166089965397925, 'f1': 0.03529411764705882}, 'combined': 0.025014277555682467, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Russian: {'template': {'p': 0.9358974358974359, 'r': 0.5748031496062992, 'f1': 0.7121951219512195}, 'slot': {'p': 0.6363636363636364, 'r': 0.018166089965397925, 'f1': 0.03532380151387721}, 'combined': 0.02515743912695645, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 17 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:18:01.638190: step: 4/77, loss: 0.000641880847979337 2023-01-23 23:18:02.941376: step: 8/77, loss: 0.002244143048301339 2023-01-23 23:18:04.254950: step: 12/77, loss: 0.021898828446865082 2023-01-23 23:18:05.524693: step: 16/77, loss: 0.0001363503688480705 2023-01-23 23:18:06.774623: step: 20/77, loss: 8.214037370635197e-05 2023-01-23 23:18:08.070277: step: 24/77, loss: 1.8887612895923667e-05 2023-01-23 23:18:09.343296: step: 28/77, loss: 0.0013867117231711745 2023-01-23 23:18:10.676160: step: 32/77, loss: 0.0017078772652894258 2023-01-23 23:18:11.951990: step: 36/77, loss: 3.950330938096158e-05 2023-01-23 23:18:13.238425: step: 40/77, loss: 0.0019030816620215774 2023-01-23 23:18:14.538629: step: 44/77, loss: 8.705825894139707e-05 2023-01-23 23:18:15.828740: step: 48/77, loss: 0.00028506916714832187 2023-01-23 23:18:17.109801: step: 52/77, loss: 0.005858226679265499 2023-01-23 23:18:18.367614: step: 56/77, loss: 0.0007443473441526294 2023-01-23 23:18:19.636852: step: 60/77, loss: 0.00011790436110459268 2023-01-23 23:18:20.891364: step: 64/77, loss: 1.5535473721683957e-05 2023-01-23 23:18:22.176978: step: 68/77, loss: 0.0008613172685727477 2023-01-23 23:18:23.476166: step: 72/77, loss: 0.005559463519603014 2023-01-23 23:18:24.754452: step: 76/77, loss: 0.04706824570894241 2023-01-23 23:18:26.047084: step: 80/77, loss: 6.35113101452589e-05 2023-01-23 23:18:27.299879: step: 84/77, loss: 0.00010967568960040808 2023-01-23 23:18:28.603514: step: 88/77, loss: 0.0002559815184213221 2023-01-23 23:18:29.882668: step: 92/77, loss: 0.031311001628637314 2023-01-23 23:18:31.146085: step: 96/77, loss: 0.055141374468803406 2023-01-23 23:18:32.402242: step: 100/77, loss: 0.016477080062031746 2023-01-23 23:18:33.693100: step: 104/77, loss: 0.0002590902440715581 2023-01-23 23:18:34.985602: step: 108/77, loss: 6.216138717718422e-05 2023-01-23 23:18:36.288126: step: 112/77, loss: 0.0004739709838759154 2023-01-23 23:18:37.577255: step: 116/77, loss: 0.008552845567464828 2023-01-23 23:18:38.854636: step: 120/77, loss: 0.001149423187598586 2023-01-23 23:18:40.154232: step: 124/77, loss: 0.01125161163508892 2023-01-23 23:18:41.434667: step: 128/77, loss: 0.00855990033596754 2023-01-23 23:18:42.658948: step: 132/77, loss: 0.005925518926233053 2023-01-23 23:18:43.920061: step: 136/77, loss: 0.0032444128300994635 2023-01-23 23:18:45.220366: step: 140/77, loss: 0.0002631635288707912 2023-01-23 23:18:46.477747: step: 144/77, loss: 0.07887933403253555 2023-01-23 23:18:47.751961: step: 148/77, loss: 0.0003720789682120085 2023-01-23 23:18:49.008798: step: 152/77, loss: 0.01905059814453125 2023-01-23 23:18:50.335574: step: 156/77, loss: 0.008945231325924397 2023-01-23 23:18:51.629447: step: 160/77, loss: 0.0002890854375436902 2023-01-23 23:18:52.957570: step: 164/77, loss: 0.019471365958452225 2023-01-23 23:18:54.260285: step: 168/77, loss: 0.0015825422015041113 2023-01-23 23:18:55.543260: step: 172/77, loss: 0.008039366453886032 2023-01-23 23:18:56.812328: step: 176/77, loss: 0.0008831970044411719 2023-01-23 23:18:58.070022: step: 180/77, loss: 0.014655493199825287 2023-01-23 23:18:59.356828: step: 184/77, loss: 0.04497211426496506 2023-01-23 23:19:00.624514: step: 188/77, loss: 0.002673403127118945 2023-01-23 23:19:01.935721: step: 192/77, loss: 0.0011580471182242036 2023-01-23 23:19:03.227767: step: 196/77, loss: 1.5838615581742488e-05 2023-01-23 23:19:04.511204: step: 200/77, loss: 4.164213896729052e-05 2023-01-23 23:19:05.803254: step: 204/77, loss: 1.3706779100175481e-05 2023-01-23 23:19:07.106656: step: 208/77, loss: 9.41771941143088e-06 2023-01-23 23:19:08.414865: step: 212/77, loss: 0.00037044179043732584 2023-01-23 23:19:09.670639: step: 216/77, loss: 0.0010035919258370996 2023-01-23 23:19:10.936308: step: 220/77, loss: 2.4068180209724233e-05 2023-01-23 23:19:12.235506: step: 224/77, loss: 0.0002719534677453339 2023-01-23 23:19:13.514771: step: 228/77, loss: 0.0005661757895722985 2023-01-23 23:19:14.788258: step: 232/77, loss: 7.066810212563723e-05 2023-01-23 23:19:16.101197: step: 236/77, loss: 0.00011910132161574438 2023-01-23 23:19:17.452928: step: 240/77, loss: 0.028321973979473114 2023-01-23 23:19:18.747166: step: 244/77, loss: 0.0003590689739212394 2023-01-23 23:19:20.048409: step: 248/77, loss: 3.452347664278932e-05 2023-01-23 23:19:21.356467: step: 252/77, loss: 0.0032013666350394487 2023-01-23 23:19:22.579279: step: 256/77, loss: 9.446490730624646e-05 2023-01-23 23:19:23.873188: step: 260/77, loss: 0.028026148676872253 2023-01-23 23:19:25.117856: step: 264/77, loss: 6.273339749895968e-07 2023-01-23 23:19:26.438582: step: 268/77, loss: 0.002552525606006384 2023-01-23 23:19:27.758240: step: 272/77, loss: 0.00981560256332159 2023-01-23 23:19:29.047608: step: 276/77, loss: 3.770161856664345e-05 2023-01-23 23:19:30.295363: step: 280/77, loss: 0.0011477674124762416 2023-01-23 23:19:31.573488: step: 284/77, loss: 0.027259770780801773 2023-01-23 23:19:32.888576: step: 288/77, loss: 0.03577423840761185 2023-01-23 23:19:34.204958: step: 292/77, loss: 0.12435278296470642 2023-01-23 23:19:35.475905: step: 296/77, loss: 5.416186922957422e-06 2023-01-23 23:19:36.798437: step: 300/77, loss: 1.01263740361901e-05 2023-01-23 23:19:38.087960: step: 304/77, loss: 0.026366937905550003 2023-01-23 23:19:39.372412: step: 308/77, loss: 0.0008996648248285055 2023-01-23 23:19:40.665608: step: 312/77, loss: 4.338581675256137e-06 2023-01-23 23:19:41.983460: step: 316/77, loss: 0.00284166494384408 2023-01-23 23:19:43.283049: step: 320/77, loss: 0.0002525055897422135 2023-01-23 23:19:44.534379: step: 324/77, loss: 0.00012097764556529 2023-01-23 23:19:45.815221: step: 328/77, loss: 0.02872185781598091 2023-01-23 23:19:47.059594: step: 332/77, loss: 3.208328416803852e-05 2023-01-23 23:19:48.356668: step: 336/77, loss: 0.019269438460469246 2023-01-23 23:19:49.656611: step: 340/77, loss: 0.0260951928794384 2023-01-23 23:19:50.905818: step: 344/77, loss: 0.004579652100801468 2023-01-23 23:19:52.222170: step: 348/77, loss: 0.0003985298390034586 2023-01-23 23:19:53.564798: step: 352/77, loss: 0.009386884048581123 2023-01-23 23:19:54.895774: step: 356/77, loss: 2.577963641670067e-05 2023-01-23 23:19:56.181438: step: 360/77, loss: 0.00037908164085820317 2023-01-23 23:19:57.471407: step: 364/77, loss: 0.0006229857681319118 2023-01-23 23:19:58.797255: step: 368/77, loss: 0.0008804745739325881 2023-01-23 23:20:00.140322: step: 372/77, loss: 0.005055315792560577 2023-01-23 23:20:01.421116: step: 376/77, loss: 0.00023594040249008685 2023-01-23 23:20:02.706473: step: 380/77, loss: 5.790413706563413e-05 2023-01-23 23:20:03.982415: step: 384/77, loss: 0.02676387131214142 2023-01-23 23:20:05.253705: step: 388/77, loss: 9.52510345086921e-06 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Chinese: {'template': {'p': 0.9466666666666667, 'r': 0.5590551181102362, 'f1': 0.7029702970297029}, 'slot': {'p': 0.5869565217391305, 'r': 0.023356401384083045, 'f1': 0.04492512479201331}, 'combined': 0.03158102831913807, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Korean: {'template': {'p': 0.9333333333333333, 'r': 0.5511811023622047, 'f1': 0.693069306930693}, 'slot': {'p': 0.5652173913043478, 'r': 0.02249134948096886, 'f1': 0.04326123128119801}, 'combined': 0.02998303158102832, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Russian: {'template': {'p': 0.935064935064935, 'r': 0.5669291338582677, 'f1': 0.7058823529411765}, 'slot': {'p': 0.574468085106383, 'r': 0.023356401384083045, 'f1': 0.044887780548628436}, 'combined': 0.03168549215197301, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 18 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:21:45.451068: step: 4/77, loss: 0.0002562256995588541 2023-01-23 23:21:46.731125: step: 8/77, loss: 0.005105638410896063 2023-01-23 23:21:47.973670: step: 12/77, loss: 0.021551361307501793 2023-01-23 23:21:49.260861: step: 16/77, loss: 0.0009725134586915374 2023-01-23 23:21:50.512985: step: 20/77, loss: 0.00022831915703136474 2023-01-23 23:21:51.839003: step: 24/77, loss: 0.0030773894395679235 2023-01-23 23:21:53.103651: step: 28/77, loss: 0.014899294823408127 2023-01-23 23:21:54.376756: step: 32/77, loss: 0.00023760151816532016 2023-01-23 23:21:55.666105: step: 36/77, loss: 0.001946118427440524 2023-01-23 23:21:56.966039: step: 40/77, loss: 0.03807735815644264 2023-01-23 23:21:58.225939: step: 44/77, loss: 0.0009200061904266477 2023-01-23 23:21:59.490311: step: 48/77, loss: 1.4559274859493598e-05 2023-01-23 23:22:00.747844: step: 52/77, loss: 0.03053630143404007 2023-01-23 23:22:02.016726: step: 56/77, loss: 0.013728977181017399 2023-01-23 23:22:03.323321: step: 60/77, loss: 9.248963033314794e-05 2023-01-23 23:22:04.594369: step: 64/77, loss: 0.00022051921405363828 2023-01-23 23:22:05.880931: step: 68/77, loss: 0.00015630066627636552 2023-01-23 23:22:07.134506: step: 72/77, loss: 0.005821420811116695 2023-01-23 23:22:08.410043: step: 76/77, loss: 0.0047253817319869995 2023-01-23 23:22:09.700496: step: 80/77, loss: 0.0034232642501592636 2023-01-23 23:22:10.985806: step: 84/77, loss: 0.010041794739663601 2023-01-23 23:22:12.250950: step: 88/77, loss: 0.001764630083926022 2023-01-23 23:22:13.528873: step: 92/77, loss: 0.0006076145800761878 2023-01-23 23:22:14.840062: step: 96/77, loss: 0.00011601823644014075 2023-01-23 23:22:16.104455: step: 100/77, loss: 0.019558578729629517 2023-01-23 23:22:17.431294: step: 104/77, loss: 0.011507065035402775 2023-01-23 23:22:18.698398: step: 108/77, loss: 7.664941222174093e-06 2023-01-23 23:22:19.958705: step: 112/77, loss: 0.0002507162862457335 2023-01-23 23:22:21.228427: step: 116/77, loss: 0.006137733347713947 2023-01-23 23:22:22.454019: step: 120/77, loss: 0.032717470079660416 2023-01-23 23:22:23.698568: step: 124/77, loss: 0.01983604021370411 2023-01-23 23:22:25.000344: step: 128/77, loss: 4.210448241792619e-05 2023-01-23 23:22:26.281066: step: 132/77, loss: 0.011637914925813675 2023-01-23 23:22:27.569159: step: 136/77, loss: 1.722968227113597e-05 2023-01-23 23:22:28.875871: step: 140/77, loss: 1.2233510915393708e-06 2023-01-23 23:22:30.156327: step: 144/77, loss: 0.002384501276537776 2023-01-23 23:22:31.410150: step: 148/77, loss: 0.00037618394708260894 2023-01-23 23:22:32.645781: step: 152/77, loss: 0.0022240763064473867 2023-01-23 23:22:33.892380: step: 156/77, loss: 0.003713844809681177 2023-01-23 23:22:35.204784: step: 160/77, loss: 0.008770265616476536 2023-01-23 23:22:36.515035: step: 164/77, loss: 0.0006316181388683617 2023-01-23 23:22:37.796722: step: 168/77, loss: 3.712902980623767e-05 2023-01-23 23:22:39.116411: step: 172/77, loss: 0.05797393247485161 2023-01-23 23:22:40.387574: step: 176/77, loss: 8.296656596940011e-05 2023-01-23 23:22:41.676974: step: 180/77, loss: 0.007537681609392166 2023-01-23 23:22:42.920155: step: 184/77, loss: 0.023705052211880684 2023-01-23 23:22:44.177292: step: 188/77, loss: 2.0122100977459922e-05 2023-01-23 23:22:45.441355: step: 192/77, loss: 0.00016706100723240525 2023-01-23 23:22:46.716675: step: 196/77, loss: 0.00039428245509043336 2023-01-23 23:22:47.974124: step: 200/77, loss: 0.0008982608560472727 2023-01-23 23:22:49.316321: step: 204/77, loss: 0.007041514851152897 2023-01-23 23:22:50.571808: step: 208/77, loss: 0.00011690274550346658 2023-01-23 23:22:51.843719: step: 212/77, loss: 0.0006392638315446675 2023-01-23 23:22:53.153427: step: 216/77, loss: 0.003344218945130706 2023-01-23 23:22:54.483968: step: 220/77, loss: 0.0005616850685328245 2023-01-23 23:22:55.770952: step: 224/77, loss: 0.00034996814792975783 2023-01-23 23:22:57.067891: step: 228/77, loss: 3.20040180668002e-06 2023-01-23 23:22:58.325951: step: 232/77, loss: 0.005285785999149084 2023-01-23 23:22:59.607113: step: 236/77, loss: 0.0009092001710087061 2023-01-23 23:23:00.878047: step: 240/77, loss: 0.00760983070358634 2023-01-23 23:23:02.137731: step: 244/77, loss: 0.008531717583537102 2023-01-23 23:23:03.428209: step: 248/77, loss: 0.0012734340270981193 2023-01-23 23:23:04.727899: step: 252/77, loss: 0.0020677277352660894 2023-01-23 23:23:05.988871: step: 256/77, loss: 1.4285917131928727e-05 2023-01-23 23:23:07.282806: step: 260/77, loss: 3.830181958619505e-05 2023-01-23 23:23:08.575938: step: 264/77, loss: 0.007554773241281509 2023-01-23 23:23:09.876645: step: 268/77, loss: 1.0465077139087953e-05 2023-01-23 23:23:11.148025: step: 272/77, loss: 0.0043717920780181885 2023-01-23 23:23:12.431642: step: 276/77, loss: 0.006848793011158705 2023-01-23 23:23:13.740882: step: 280/77, loss: 0.011280843056738377 2023-01-23 23:23:15.011145: step: 284/77, loss: 0.000846183393150568 2023-01-23 23:23:16.272442: step: 288/77, loss: 0.03682919219136238 2023-01-23 23:23:17.577128: step: 292/77, loss: 0.02590131014585495 2023-01-23 23:23:18.869170: step: 296/77, loss: 0.0019165530102327466 2023-01-23 23:23:20.201173: step: 300/77, loss: 0.0008535957313142717 2023-01-23 23:23:21.474330: step: 304/77, loss: 0.0002510050544515252 2023-01-23 23:23:22.763866: step: 308/77, loss: 7.329511572606862e-05 2023-01-23 23:23:24.015239: step: 312/77, loss: 0.01697651669383049 2023-01-23 23:23:25.286250: step: 316/77, loss: 5.466431957756868e-06 2023-01-23 23:23:26.562366: step: 320/77, loss: 4.962068373970396e-07 2023-01-23 23:23:27.839131: step: 324/77, loss: 3.868531348416582e-05 2023-01-23 23:23:29.155065: step: 328/77, loss: 0.0004085543332621455 2023-01-23 23:23:30.401796: step: 332/77, loss: 4.1887658881023526e-05 2023-01-23 23:23:31.728957: step: 336/77, loss: 0.0001989584561670199 2023-01-23 23:23:33.029221: step: 340/77, loss: 4.200351668259827e-06 2023-01-23 23:23:34.359748: step: 344/77, loss: 0.0006397374672815204 2023-01-23 23:23:35.629639: step: 348/77, loss: 0.013820907101035118 2023-01-23 23:23:36.961380: step: 352/77, loss: 0.009886541403830051 2023-01-23 23:23:38.269489: step: 356/77, loss: 5.0389326133881696e-06 2023-01-23 23:23:39.570348: step: 360/77, loss: 1.8894158984039677e-06 2023-01-23 23:23:40.822322: step: 364/77, loss: 0.004064930137246847 2023-01-23 23:23:42.126359: step: 368/77, loss: 0.00628216378390789 2023-01-23 23:23:43.410007: step: 372/77, loss: 7.403906784020364e-05 2023-01-23 23:23:44.690039: step: 376/77, loss: 9.216591570293531e-05 2023-01-23 23:23:45.933861: step: 380/77, loss: 0.002427300438284874 2023-01-23 23:23:47.261995: step: 384/77, loss: 0.019096795469522476 2023-01-23 23:23:48.526854: step: 388/77, loss: 2.604655037430348e-06 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Chinese: {'template': {'p': 0.9333333333333333, 'r': 0.5511811023622047, 'f1': 0.693069306930693}, 'slot': {'p': 0.5952380952380952, 'r': 0.02162629757785467, 'f1': 0.041736227045075125}, 'combined': 0.028926097952032263, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Korean: {'template': {'p': 0.9333333333333333, 'r': 0.5511811023622047, 'f1': 0.693069306930693}, 'slot': {'p': 0.5813953488372093, 'r': 0.02162629757785467, 'f1': 0.041701417848206836}, 'combined': 0.028901972766083944, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Russian: {'template': {'p': 0.9333333333333333, 'r': 0.5511811023622047, 'f1': 0.693069306930693}, 'slot': {'p': 0.5813953488372093, 'r': 0.02162629757785467, 'f1': 0.041701417848206836}, 'combined': 0.028901972766083944, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 19 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:25:28.829560: step: 4/77, loss: 4.772192187374458e-05 2023-01-23 23:25:30.102897: step: 8/77, loss: 0.015655361115932465 2023-01-23 23:25:31.397506: step: 12/77, loss: 0.00045056085218675435 2023-01-23 23:25:32.698182: step: 16/77, loss: 4.759726289194077e-05 2023-01-23 23:25:33.976617: step: 20/77, loss: 0.021239858120679855 2023-01-23 23:25:35.236428: step: 24/77, loss: 0.02561485767364502 2023-01-23 23:25:36.500447: step: 28/77, loss: 0.000703348487149924 2023-01-23 23:25:37.784594: step: 32/77, loss: 0.0007722416776232421 2023-01-23 23:25:39.077197: step: 36/77, loss: 2.419845804979559e-05 2023-01-23 23:25:40.336182: step: 40/77, loss: 0.026117466390132904 2023-01-23 23:25:41.656604: step: 44/77, loss: 4.887551767751575e-07 2023-01-23 23:25:42.973559: step: 48/77, loss: 0.019306404516100883 2023-01-23 23:25:44.210840: step: 52/77, loss: 0.0004252229700796306 2023-01-23 23:25:45.464932: step: 56/77, loss: 5.336081812856719e-05 2023-01-23 23:25:46.721608: step: 60/77, loss: 0.00016709948249626905 2023-01-23 23:25:47.977070: step: 64/77, loss: 7.643592653039377e-06 2023-01-23 23:25:49.264921: step: 68/77, loss: 0.011480233632028103 2023-01-23 23:25:50.565837: step: 72/77, loss: 0.019186750054359436 2023-01-23 23:25:51.841381: step: 76/77, loss: 0.005349922925233841 2023-01-23 23:25:53.145088: step: 80/77, loss: 0.11054429411888123 2023-01-23 23:25:54.433946: step: 84/77, loss: 0.0074567473493516445 2023-01-23 23:25:55.691671: step: 88/77, loss: 0.00015486495976801962 2023-01-23 23:25:56.964674: step: 92/77, loss: 0.00017922437109518796 2023-01-23 23:25:58.261862: step: 96/77, loss: 0.000948175962548703 2023-01-23 23:25:59.537382: step: 100/77, loss: 0.014781606383621693 2023-01-23 23:26:00.825114: step: 104/77, loss: 0.0035185841843485832 2023-01-23 23:26:02.128736: step: 108/77, loss: 0.016932154074311256 2023-01-23 23:26:03.450290: step: 112/77, loss: 0.0009530320530757308 2023-01-23 23:26:04.769938: step: 116/77, loss: 0.056651294231414795 2023-01-23 23:26:06.070146: step: 120/77, loss: 0.0004563061229418963 2023-01-23 23:26:07.340725: step: 124/77, loss: 2.2642556359642185e-05 2023-01-23 23:26:08.646874: step: 128/77, loss: 0.0009033044916577637 2023-01-23 23:26:09.883928: step: 132/77, loss: 0.0007692720391787589 2023-01-23 23:26:11.169139: step: 136/77, loss: 6.139226798040909e-07 2023-01-23 23:26:12.408925: step: 140/77, loss: 0.01133093424141407 2023-01-23 23:26:13.697756: step: 144/77, loss: 0.0002770505379885435 2023-01-23 23:26:14.992271: step: 148/77, loss: 0.021317068487405777 2023-01-23 23:26:16.250299: step: 152/77, loss: 0.00023177666298579425 2023-01-23 23:26:17.453271: step: 156/77, loss: 0.004699833691120148 2023-01-23 23:26:18.689340: step: 160/77, loss: 5.041718759457581e-05 2023-01-23 23:26:19.952579: step: 164/77, loss: 0.11572644114494324 2023-01-23 23:26:21.253865: step: 168/77, loss: 0.00011996572720818222 2023-01-23 23:26:22.550943: step: 172/77, loss: 0.0028565579559653997 2023-01-23 23:26:23.808872: step: 176/77, loss: 1.9968625565525144e-05 2023-01-23 23:26:25.097814: step: 180/77, loss: 0.0027047202456742525 2023-01-23 23:26:26.431670: step: 184/77, loss: 0.00011742675269488245 2023-01-23 23:26:27.720373: step: 188/77, loss: 2.1647458197548985e-05 2023-01-23 23:26:29.027723: step: 192/77, loss: 0.011670035310089588 2023-01-23 23:26:30.328311: step: 196/77, loss: 4.1424587493565923e-07 2023-01-23 23:26:31.634770: step: 200/77, loss: 0.00022093798907008022 2023-01-23 23:26:32.977190: step: 204/77, loss: 0.004982348531484604 2023-01-23 23:26:34.256913: step: 208/77, loss: 0.0030651569832116365 2023-01-23 23:26:35.571517: step: 212/77, loss: 0.012024176307022572 2023-01-23 23:26:36.911606: step: 216/77, loss: 0.009974350221455097 2023-01-23 23:26:38.221315: step: 220/77, loss: 5.705433795810677e-05 2023-01-23 23:26:39.534839: step: 224/77, loss: 0.010892936028540134 2023-01-23 23:26:40.902084: step: 228/77, loss: 6.651450348726939e-06 2023-01-23 23:26:42.207973: step: 232/77, loss: 0.002441111486405134 2023-01-23 23:26:43.499332: step: 236/77, loss: 2.452632998029003e-06 2023-01-23 23:26:44.786656: step: 240/77, loss: 0.00028365125763230026 2023-01-23 23:26:46.074816: step: 244/77, loss: 0.000377084594219923 2023-01-23 23:26:47.360288: step: 248/77, loss: 1.0982014373439597e-06 2023-01-23 23:26:48.688929: step: 252/77, loss: 0.006289721466600895 2023-01-23 23:26:50.005517: step: 256/77, loss: 0.000724776997230947 2023-01-23 23:26:51.356136: step: 260/77, loss: 0.0017441506497561932 2023-01-23 23:26:52.631117: step: 264/77, loss: 0.0012942147441208363 2023-01-23 23:26:53.959906: step: 268/77, loss: 0.0005732322460971773 2023-01-23 23:26:55.283401: step: 272/77, loss: 0.005865746643394232 2023-01-23 23:26:56.619180: step: 276/77, loss: 0.017637211829423904 2023-01-23 23:26:57.908378: step: 280/77, loss: 0.0005447964067570865 2023-01-23 23:26:59.188821: step: 284/77, loss: 3.735323844011873e-05 2023-01-23 23:27:00.543546: step: 288/77, loss: 0.003682750044390559 2023-01-23 23:27:01.876776: step: 292/77, loss: 0.07408548891544342 2023-01-23 23:27:03.172099: step: 296/77, loss: 1.5228745724016335e-05 2023-01-23 23:27:04.515817: step: 300/77, loss: 0.006958605255931616 2023-01-23 23:27:05.822295: step: 304/77, loss: 0.042658884078264236 2023-01-23 23:27:07.131183: step: 308/77, loss: 0.005706743337213993 2023-01-23 23:27:08.465027: step: 312/77, loss: 0.00036678268224932253 2023-01-23 23:27:09.797805: step: 316/77, loss: 6.538533489219844e-05 2023-01-23 23:27:11.106341: step: 320/77, loss: 0.0004068778653163463 2023-01-23 23:27:12.401381: step: 324/77, loss: 1.0171790563617833e-05 2023-01-23 23:27:13.720858: step: 328/77, loss: 0.0013522073859348893 2023-01-23 23:27:15.014159: step: 332/77, loss: 0.0041106091812253 2023-01-23 23:27:16.328813: step: 336/77, loss: 0.031761832535266876 2023-01-23 23:27:17.645505: step: 340/77, loss: 0.0006097041186876595 2023-01-23 23:27:18.903109: step: 344/77, loss: 0.0002405718551017344 2023-01-23 23:27:20.225606: step: 348/77, loss: 0.005357260815799236 2023-01-23 23:27:21.572432: step: 352/77, loss: 0.015837207436561584 2023-01-23 23:27:22.915110: step: 356/77, loss: 9.958293230738491e-05 2023-01-23 23:27:24.232218: step: 360/77, loss: 0.00025743391597643495 2023-01-23 23:27:25.515862: step: 364/77, loss: 0.010441180318593979 2023-01-23 23:27:26.855654: step: 368/77, loss: 0.0012988889357075095 2023-01-23 23:27:28.195972: step: 372/77, loss: 0.013321981765329838 2023-01-23 23:27:29.487960: step: 376/77, loss: 0.005950694903731346 2023-01-23 23:27:30.846317: step: 380/77, loss: 0.06717066466808319 2023-01-23 23:27:32.186749: step: 384/77, loss: 0.059004560112953186 2023-01-23 23:27:33.468236: step: 388/77, loss: 0.0031717417296022177 ================================================== Loss: 0.010 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 19} Test Chinese: {'template': {'p': 0.9466666666666667, 'r': 0.5590551181102362, 'f1': 0.7029702970297029}, 'slot': {'p': 0.625, 'r': 0.01730103806228374, 'f1': 0.03367003367003367}, 'combined': 0.023669033570023666, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 19} Test Korean: {'template': {'p': 0.9452054794520548, 'r': 0.5433070866141733, 'f1': 0.69}, 'slot': {'p': 0.6060606060606061, 'r': 0.01730103806228374, 'f1': 0.0336417157275021}, 'combined': 0.02321278385197645, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 19} Test Russian: {'template': {'p': 0.9466666666666667, 'r': 0.5590551181102362, 'f1': 0.7029702970297029}, 'slot': {'p': 0.625, 'r': 0.01730103806228374, 'f1': 0.03367003367003367}, 'combined': 0.023669033570023666, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 20 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:29:14.363871: step: 4/77, loss: 0.02381194569170475 2023-01-23 23:29:15.663063: step: 8/77, loss: 0.025099212303757668 2023-01-23 23:29:16.936487: step: 12/77, loss: 1.4690409443574026e-05 2023-01-23 23:29:18.222389: step: 16/77, loss: 0.02800574153661728 2023-01-23 23:29:19.527525: step: 20/77, loss: 6.664691227342701e-06 2023-01-23 23:29:20.797654: step: 24/77, loss: 2.817774839058984e-05 2023-01-23 23:29:22.068362: step: 28/77, loss: 0.0017202789895236492 2023-01-23 23:29:23.358186: step: 32/77, loss: 0.0005560817080549896 2023-01-23 23:29:24.667189: step: 36/77, loss: 0.004274291917681694 2023-01-23 23:29:25.984429: step: 40/77, loss: 0.05250071361660957 2023-01-23 23:29:27.324042: step: 44/77, loss: 0.0007789755472913384 2023-01-23 23:29:28.611344: step: 48/77, loss: 0.009650000371038914 2023-01-23 23:29:29.868657: step: 52/77, loss: 0.0005584878381341696 2023-01-23 23:29:31.159533: step: 56/77, loss: 0.0029105565045028925 2023-01-23 23:29:32.390966: step: 60/77, loss: 0.005994163453578949 2023-01-23 23:29:33.700494: step: 64/77, loss: 0.007980176247656345 2023-01-23 23:29:34.994223: step: 68/77, loss: 1.2488882020988967e-05 2023-01-23 23:29:36.274816: step: 72/77, loss: 0.0029365727677941322 2023-01-23 23:29:37.549810: step: 76/77, loss: 0.000259653344983235 2023-01-23 23:29:38.862946: step: 80/77, loss: 0.06503565609455109 2023-01-23 23:29:40.131702: step: 84/77, loss: 0.009259148500859737 2023-01-23 23:29:41.396638: step: 88/77, loss: 0.000161934774951078 2023-01-23 23:29:42.677399: step: 92/77, loss: 0.0005904276622459292 2023-01-23 23:29:43.943570: step: 96/77, loss: 0.0002479857357684523 2023-01-23 23:29:45.223643: step: 100/77, loss: 2.837385363818612e-05 2023-01-23 23:29:46.476020: step: 104/77, loss: 0.001441130181774497 2023-01-23 23:29:47.781918: step: 108/77, loss: 0.0002336905017727986 2023-01-23 23:29:49.065366: step: 112/77, loss: 0.02967887371778488 2023-01-23 23:29:50.349278: step: 116/77, loss: 0.0016614971682429314 2023-01-23 23:29:51.638189: step: 120/77, loss: 0.002425632206723094 2023-01-23 23:29:52.922060: step: 124/77, loss: 0.02181481570005417 2023-01-23 23:29:54.198736: step: 128/77, loss: 0.0042419275268912315 2023-01-23 23:29:55.485344: step: 132/77, loss: 0.005345655605196953 2023-01-23 23:29:56.800328: step: 136/77, loss: 0.011341053992509842 2023-01-23 23:29:58.085819: step: 140/77, loss: 0.002245645970106125 2023-01-23 23:29:59.362255: step: 144/77, loss: 0.00017619726713746786 2023-01-23 23:30:00.633903: step: 148/77, loss: 0.014670961536467075 2023-01-23 23:30:01.917567: step: 152/77, loss: 0.00041092970059253275 2023-01-23 23:30:03.187975: step: 156/77, loss: 8.03138391347602e-05 2023-01-23 23:30:04.494603: step: 160/77, loss: 0.003945107106119394 2023-01-23 23:30:05.764290: step: 164/77, loss: 9.81518387561664e-05 2023-01-23 23:30:07.026263: step: 168/77, loss: 0.0017670283559709787 2023-01-23 23:30:08.324416: step: 172/77, loss: 2.3841788276968146e-07 2023-01-23 23:30:09.611565: step: 176/77, loss: 1.1550903764145914e-05 2023-01-23 23:30:10.869711: step: 180/77, loss: 0.0025116358883678913 2023-01-23 23:30:12.181922: step: 184/77, loss: 0.004818919580429792 2023-01-23 23:30:13.463856: step: 188/77, loss: 0.0010180637473240495 2023-01-23 23:30:14.769701: step: 192/77, loss: 0.00011704555799951777 2023-01-23 23:30:16.016545: step: 196/77, loss: 7.31146355974488e-05 2023-01-23 23:30:17.249211: step: 200/77, loss: 0.007681186310946941 2023-01-23 23:30:18.521457: step: 204/77, loss: 0.0030077178962528706 2023-01-23 23:30:19.825640: step: 208/77, loss: 0.021733706817030907 2023-01-23 23:30:21.098527: step: 212/77, loss: 0.005440224893391132 2023-01-23 23:30:22.423188: step: 216/77, loss: 0.007626230828464031 2023-01-23 23:30:23.725302: step: 220/77, loss: 0.0004863716894760728 2023-01-23 23:30:25.026771: step: 224/77, loss: 0.0017873606411740184 2023-01-23 23:30:26.291470: step: 228/77, loss: 7.974127584020607e-06 2023-01-23 23:30:27.581380: step: 232/77, loss: 2.900967956520617e-06 2023-01-23 23:30:28.891101: step: 236/77, loss: 1.7486419892520644e-05 2023-01-23 23:30:30.145104: step: 240/77, loss: 0.041171155869960785 2023-01-23 23:30:31.443083: step: 244/77, loss: 0.0022427060175687075 2023-01-23 23:30:32.728926: step: 248/77, loss: 0.018227651715278625 2023-01-23 23:30:33.977430: step: 252/77, loss: 0.005274713505059481 2023-01-23 23:30:35.244439: step: 256/77, loss: 0.00016559204959776253 2023-01-23 23:30:36.491506: step: 260/77, loss: 0.023844944313168526 2023-01-23 23:30:37.816146: step: 264/77, loss: 0.10402211546897888 2023-01-23 23:30:39.135656: step: 268/77, loss: 1.6917410903261043e-05 2023-01-23 23:30:40.426982: step: 272/77, loss: 6.095885328250006e-05 2023-01-23 23:30:41.706806: step: 276/77, loss: 0.00011749435361707583 2023-01-23 23:30:42.980272: step: 280/77, loss: 1.0508897503314074e-05 2023-01-23 23:30:44.292839: step: 284/77, loss: 0.00043762908899225295 2023-01-23 23:30:45.540426: step: 288/77, loss: 0.00020534142095129937 2023-01-23 23:30:46.880196: step: 292/77, loss: 0.0012233004672452807 2023-01-23 23:30:48.171036: step: 296/77, loss: 4.289309435989708e-05 2023-01-23 23:30:49.490569: step: 300/77, loss: 2.827224670909345e-05 2023-01-23 23:30:50.805838: step: 304/77, loss: 0.0011272351257503033 2023-01-23 23:30:52.038363: step: 308/77, loss: 7.997609827725682e-06 2023-01-23 23:30:53.290449: step: 312/77, loss: 0.007476623170077801 2023-01-23 23:30:54.573433: step: 316/77, loss: 0.00041830280679278076 2023-01-23 23:30:55.886378: step: 320/77, loss: 3.9598311559529975e-05 2023-01-23 23:30:57.177209: step: 324/77, loss: 3.4866736768890405e-06 2023-01-23 23:30:58.449790: step: 328/77, loss: 0.011931387707591057 2023-01-23 23:30:59.718181: step: 332/77, loss: 8.82970925886184e-06 2023-01-23 23:31:01.011943: step: 336/77, loss: 0.021589653566479683 2023-01-23 23:31:02.279303: step: 340/77, loss: 0.00010848701640497893 2023-01-23 23:31:03.556061: step: 344/77, loss: 0.00011941129196202382 2023-01-23 23:31:04.831125: step: 348/77, loss: 0.00019851088291034102 2023-01-23 23:31:06.103952: step: 352/77, loss: 0.0003793227078858763 2023-01-23 23:31:07.441879: step: 356/77, loss: 2.4331914119102294e-06 2023-01-23 23:31:08.735317: step: 360/77, loss: 1.367283402942121e-05 2023-01-23 23:31:10.042964: step: 364/77, loss: 0.00044953133328817785 2023-01-23 23:31:11.301312: step: 368/77, loss: 0.003973706159740686 2023-01-23 23:31:12.570132: step: 372/77, loss: 0.00062417215667665 2023-01-23 23:31:13.837888: step: 376/77, loss: 0.014592466875910759 2023-01-23 23:31:15.102482: step: 380/77, loss: 1.2157357559772208e-05 2023-01-23 23:31:16.371744: step: 384/77, loss: 0.012784970924258232 2023-01-23 23:31:17.647821: step: 388/77, loss: 0.00866780523210764 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 20} Test Chinese: {'template': {'p': 0.9210526315789473, 'r': 0.5511811023622047, 'f1': 0.6896551724137933}, 'slot': {'p': 0.5227272727272727, 'r': 0.019896193771626297, 'f1': 0.03833333333333333}, 'combined': 0.026436781609195405, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 20} Test Korean: {'template': {'p': 0.9210526315789473, 'r': 0.5511811023622047, 'f1': 0.6896551724137933}, 'slot': {'p': 0.5227272727272727, 'r': 0.019896193771626297, 'f1': 0.03833333333333333}, 'combined': 0.026436781609195405, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 20} Test Russian: {'template': {'p': 0.922077922077922, 'r': 0.5590551181102362, 'f1': 0.696078431372549}, 'slot': {'p': 0.5227272727272727, 'r': 0.019896193771626297, 'f1': 0.03833333333333333}, 'combined': 0.02668300653594771, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 21 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:32:58.229403: step: 4/77, loss: 7.048082579785842e-07 2023-01-23 23:32:59.483039: step: 8/77, loss: 0.03427944704890251 2023-01-23 23:33:00.779569: step: 12/77, loss: 7.991908205440268e-05 2023-01-23 23:33:02.038971: step: 16/77, loss: 3.760176332434639e-05 2023-01-23 23:33:03.322921: step: 20/77, loss: 0.02467663213610649 2023-01-23 23:33:04.638201: step: 24/77, loss: 0.10356248170137405 2023-01-23 23:33:05.865074: step: 28/77, loss: 0.022828515619039536 2023-01-23 23:33:07.135370: step: 32/77, loss: 0.0007306609186343849 2023-01-23 23:33:08.446407: step: 36/77, loss: 0.00021144589118193835 2023-01-23 23:33:09.740136: step: 40/77, loss: 5.7149991334881634e-05 2023-01-23 23:33:11.030115: step: 44/77, loss: 0.00032232870580628514 2023-01-23 23:33:12.314931: step: 48/77, loss: 0.029408568516373634 2023-01-23 23:33:13.646386: step: 52/77, loss: 0.06268583983182907 2023-01-23 23:33:14.945738: step: 56/77, loss: 0.0031255376525223255 2023-01-23 23:33:16.193439: step: 60/77, loss: 0.003057542722672224 2023-01-23 23:33:17.457191: step: 64/77, loss: 0.004814974498003721 2023-01-23 23:33:18.712592: step: 68/77, loss: 2.538320404710248e-05 2023-01-23 23:33:20.009382: step: 72/77, loss: 0.0009136873995885253 2023-01-23 23:33:21.325504: step: 76/77, loss: 0.0011968818726018071 2023-01-23 23:33:22.601612: step: 80/77, loss: 0.0003349235048517585 2023-01-23 23:33:23.917151: step: 84/77, loss: 0.06473742425441742 2023-01-23 23:33:25.182158: step: 88/77, loss: 0.00011328059918014333 2023-01-23 23:33:26.498880: step: 92/77, loss: 0.02154296264052391 2023-01-23 23:33:27.775420: step: 96/77, loss: 0.01184603851288557 2023-01-23 23:33:29.047697: step: 100/77, loss: 0.0021092176903039217 2023-01-23 23:33:30.362779: step: 104/77, loss: 2.473575477779377e-07 2023-01-23 23:33:31.600125: step: 108/77, loss: 4.977339995093644e-05 2023-01-23 23:33:32.883283: step: 112/77, loss: 0.00016734329983592033 2023-01-23 23:33:34.133201: step: 116/77, loss: 1.490108871848861e-07 2023-01-23 23:33:35.426377: step: 120/77, loss: 0.0003039098810404539 2023-01-23 23:33:36.686340: step: 124/77, loss: 2.13262319448404e-05 2023-01-23 23:33:37.977264: step: 128/77, loss: 2.0595960450009443e-05 2023-01-23 23:33:39.275286: step: 132/77, loss: 1.0728827248840389e-07 2023-01-23 23:33:40.551415: step: 136/77, loss: 2.98022992950564e-08 2023-01-23 23:33:41.853852: step: 140/77, loss: 2.5778922463359777e-07 2023-01-23 23:33:43.179846: step: 144/77, loss: 0.0001362333568977192 2023-01-23 23:33:44.426893: step: 148/77, loss: 0.008969240821897984 2023-01-23 23:33:45.716532: step: 152/77, loss: 3.102040636804304e-06 2023-01-23 23:33:47.020322: step: 156/77, loss: 0.02981063537299633 2023-01-23 23:33:48.318644: step: 160/77, loss: 0.00039904689765535295 2023-01-23 23:33:49.614701: step: 164/77, loss: 1.2789114407496527e-05 2023-01-23 23:33:50.919361: step: 168/77, loss: 1.2550574865599629e-05 2023-01-23 23:33:52.201466: step: 172/77, loss: 0.004511318635195494 2023-01-23 23:33:53.487937: step: 176/77, loss: 0.003428037278354168 2023-01-23 23:33:54.812581: step: 180/77, loss: 6.116942586231744e-06 2023-01-23 23:33:56.081067: step: 184/77, loss: 0.005304019898176193 2023-01-23 23:33:57.429523: step: 188/77, loss: 0.0006755455979146063 2023-01-23 23:33:58.689579: step: 192/77, loss: 0.0015934238908812404 2023-01-23 23:33:59.965073: step: 196/77, loss: 1.053499090630794e-06 2023-01-23 23:34:01.258756: step: 200/77, loss: 4.461091521079652e-06 2023-01-23 23:34:02.551734: step: 204/77, loss: 5.4923650168348104e-05 2023-01-23 23:34:03.843792: step: 208/77, loss: 0.00013085010868962854 2023-01-23 23:34:05.122671: step: 212/77, loss: 0.016220975667238235 2023-01-23 23:34:06.431033: step: 216/77, loss: 6.899174422869692e-07 2023-01-23 23:34:07.719682: step: 220/77, loss: 0.014226194471120834 2023-01-23 23:34:09.027320: step: 224/77, loss: 6.874488917674171e-06 2023-01-23 23:34:10.343932: step: 228/77, loss: 4.1871919620461995e-07 2023-01-23 23:34:11.640016: step: 232/77, loss: 6.766220758436248e-05 2023-01-23 23:34:12.918587: step: 236/77, loss: 4.716946932603605e-06 2023-01-23 23:34:14.224017: step: 240/77, loss: 1.7191203369293362e-05 2023-01-23 23:34:15.495440: step: 244/77, loss: 0.0005216790596023202 2023-01-23 23:34:16.768294: step: 248/77, loss: 0.0007348595536313951 2023-01-23 23:34:17.993947: step: 252/77, loss: 6.87366773490794e-05 2023-01-23 23:34:19.316979: step: 256/77, loss: 1.0489097803656477e-05 2023-01-23 23:34:20.574819: step: 260/77, loss: 0.011000293307006359 2023-01-23 23:34:21.892838: step: 264/77, loss: 0.007235467433929443 2023-01-23 23:34:23.152523: step: 268/77, loss: 0.0007028059335425496 2023-01-23 23:34:24.417439: step: 272/77, loss: 4.366143548395485e-05 2023-01-23 23:34:25.682252: step: 276/77, loss: 0.0013454877771437168 2023-01-23 23:34:26.962612: step: 280/77, loss: 6.603048677789047e-05 2023-01-23 23:34:28.247394: step: 284/77, loss: 0.02493387833237648 2023-01-23 23:34:29.505036: step: 288/77, loss: 0.0004930190043523908 2023-01-23 23:34:30.783380: step: 292/77, loss: 0.0006468009087257087 2023-01-23 23:34:32.076064: step: 296/77, loss: 0.012403626926243305 2023-01-23 23:34:33.368942: step: 300/77, loss: 0.001801351085305214 2023-01-23 23:34:34.662709: step: 304/77, loss: 2.3860138753661886e-05 2023-01-23 23:34:35.955368: step: 308/77, loss: 0.0017166226170957088 2023-01-23 23:34:37.267304: step: 312/77, loss: 0.0009119111928157508 2023-01-23 23:34:38.608355: step: 316/77, loss: 0.006388116627931595 2023-01-23 23:34:39.895536: step: 320/77, loss: 0.026480983942747116 2023-01-23 23:34:41.149191: step: 324/77, loss: 2.3476293790736236e-05 2023-01-23 23:34:42.403245: step: 328/77, loss: 0.0002862987748812884 2023-01-23 23:34:43.652588: step: 332/77, loss: 2.0965535441064276e-06 2023-01-23 23:34:44.972363: step: 336/77, loss: 1.5079215245350497e-06 2023-01-23 23:34:46.235684: step: 340/77, loss: 7.212046853055654e-07 2023-01-23 23:34:47.504967: step: 344/77, loss: 0.07714388519525528 2023-01-23 23:34:48.806867: step: 348/77, loss: 8.724490908207372e-05 2023-01-23 23:34:50.132599: step: 352/77, loss: 3.0321880331030115e-06 2023-01-23 23:34:51.471387: step: 356/77, loss: 0.00028479041066020727 2023-01-23 23:34:52.770009: step: 360/77, loss: 4.458204421098344e-05 2023-01-23 23:34:54.009743: step: 364/77, loss: 7.021379860816523e-05 2023-01-23 23:34:55.303427: step: 368/77, loss: 0.04699565842747688 2023-01-23 23:34:56.596264: step: 372/77, loss: 0.00013077085895929486 2023-01-23 23:34:57.899697: step: 376/77, loss: 2.8759140491274593e-07 2023-01-23 23:34:59.197359: step: 380/77, loss: 0.00011267088848398998 2023-01-23 23:35:00.526264: step: 384/77, loss: 0.01159019023180008 2023-01-23 23:35:01.785633: step: 388/77, loss: 1.93859477803926e-06 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Chinese: {'template': {'p': 0.9113924050632911, 'r': 0.5669291338582677, 'f1': 0.6990291262135924}, 'slot': {'p': 0.5365853658536586, 'r': 0.01903114186851211, 'f1': 0.036758563074352546}, 'combined': 0.02569530622673188, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Korean: {'template': {'p': 0.9102564102564102, 'r': 0.5590551181102362, 'f1': 0.6926829268292682}, 'slot': {'p': 0.5384615384615384, 'r': 0.018166089965397925, 'f1': 0.035146443514644354}, 'combined': 0.024345341361363404, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Russian: {'template': {'p': 0.9090909090909091, 'r': 0.5511811023622047, 'f1': 0.6862745098039216}, 'slot': {'p': 0.575, 'r': 0.019896193771626297, 'f1': 0.03846153846153846}, 'combined': 0.02639517345399698, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 22 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:36:41.767277: step: 4/77, loss: 0.00034515286097303033 2023-01-23 23:36:43.052511: step: 8/77, loss: 0.00033434914075769484 2023-01-23 23:36:44.352069: step: 12/77, loss: 9.424352901987731e-05 2023-01-23 23:36:45.638600: step: 16/77, loss: 1.2516940728346526e-07 2023-01-23 23:36:46.914627: step: 20/77, loss: 0.0005243119667284191 2023-01-23 23:36:48.118007: step: 24/77, loss: 1.5395658920169808e-05 2023-01-23 23:36:49.377034: step: 28/77, loss: 2.3789565602783114e-05 2023-01-23 23:36:50.649966: step: 32/77, loss: 0.025939157232642174 2023-01-23 23:36:51.944964: step: 36/77, loss: 1.0877823797272868e-07 2023-01-23 23:36:53.238727: step: 40/77, loss: 9.988000783778261e-06 2023-01-23 23:36:54.501277: step: 44/77, loss: 4.319598701840732e-06 2023-01-23 23:36:55.778473: step: 48/77, loss: 6.57673372188583e-05 2023-01-23 23:36:57.016300: step: 52/77, loss: 6.154135121505533e-07 2023-01-23 23:36:58.299158: step: 56/77, loss: 4.635993536794558e-05 2023-01-23 23:36:59.566311: step: 60/77, loss: 0.00046627374831587076 2023-01-23 23:37:00.780676: step: 64/77, loss: 0.03233442083001137 2023-01-23 23:37:02.079787: step: 68/77, loss: 2.6955527573591098e-05 2023-01-23 23:37:03.329469: step: 72/77, loss: 0.00029844618984498084 2023-01-23 23:37:04.572423: step: 76/77, loss: 5.438885750663758e-07 2023-01-23 23:37:05.863484: step: 80/77, loss: 4.504471144173294e-05 2023-01-23 23:37:07.147760: step: 84/77, loss: 5.6063145166262984e-05 2023-01-23 23:37:08.453955: step: 88/77, loss: 0.00044191296910867095 2023-01-23 23:37:09.765572: step: 92/77, loss: 1.559289012220688e-05 2023-01-23 23:37:11.021812: step: 96/77, loss: 3.65998967026826e-05 2023-01-23 23:37:12.277760: step: 100/77, loss: 0.0015643913066014647 2023-01-23 23:37:13.570033: step: 104/77, loss: 0.0005108561599627137 2023-01-23 23:37:14.870924: step: 108/77, loss: 3.702392859850079e-05 2023-01-23 23:37:16.168870: step: 112/77, loss: 1.5779487512190826e-06 2023-01-23 23:37:17.405743: step: 116/77, loss: 8.033843187149614e-06 2023-01-23 23:37:18.700497: step: 120/77, loss: 0.00030644662911072373 2023-01-23 23:37:19.954522: step: 124/77, loss: 0.049470219761133194 2023-01-23 23:37:21.211040: step: 128/77, loss: 4.3808847749460256e-07 2023-01-23 23:37:22.476919: step: 132/77, loss: 1.0594386594675598e-06 2023-01-23 23:37:23.724391: step: 136/77, loss: 6.492507236544043e-05 2023-01-23 23:37:24.986455: step: 140/77, loss: 4.559675232940208e-07 2023-01-23 23:37:26.273251: step: 144/77, loss: 0.0018354627536609769 2023-01-23 23:37:27.546339: step: 148/77, loss: 0.01070198230445385 2023-01-23 23:37:28.822048: step: 152/77, loss: 0.03983582183718681 2023-01-23 23:37:30.119162: step: 156/77, loss: 0.03791589289903641 2023-01-23 23:37:31.418167: step: 160/77, loss: 8.679709026182536e-06 2023-01-23 23:37:32.691478: step: 164/77, loss: 0.004185084719210863 2023-01-23 23:37:33.978573: step: 168/77, loss: 1.7537532812639256e-06 2023-01-23 23:37:35.243259: step: 172/77, loss: 0.00013599536032415926 2023-01-23 23:37:36.548227: step: 176/77, loss: 0.019619328901171684 2023-01-23 23:37:37.873586: step: 180/77, loss: 1.1060351425840054e-05 2023-01-23 23:37:39.175488: step: 184/77, loss: 2.2942162104300223e-05 2023-01-23 23:37:40.451300: step: 188/77, loss: 0.0002040996914729476 2023-01-23 23:37:41.748593: step: 192/77, loss: 0.003156597726047039 2023-01-23 23:37:43.021683: step: 196/77, loss: 1.1533255701579037e-06 2023-01-23 23:37:44.252364: step: 200/77, loss: 0.00010490609565749764 2023-01-23 23:37:45.494371: step: 204/77, loss: 4.532407729129773e-06 2023-01-23 23:37:46.802144: step: 208/77, loss: 0.005407263059169054 2023-01-23 23:37:48.075619: step: 212/77, loss: 3.213984200556297e-06 2023-01-23 23:37:49.372110: step: 216/77, loss: 0.0004152047913521528 2023-01-23 23:37:50.712127: step: 220/77, loss: 0.00022536289179697633 2023-01-23 23:37:51.971587: step: 224/77, loss: 0.053443796932697296 2023-01-23 23:37:53.225921: step: 228/77, loss: 0.08764869719743729 2023-01-23 23:37:54.507588: step: 232/77, loss: 0.007128824945539236 2023-01-23 23:37:55.813125: step: 236/77, loss: 0.02799457125365734 2023-01-23 23:37:57.047851: step: 240/77, loss: 2.533893712097779e-05 2023-01-23 23:37:58.278140: step: 244/77, loss: 2.8621703677345067e-05 2023-01-23 23:37:59.561934: step: 248/77, loss: 3.5507250686350744e-06 2023-01-23 23:38:00.843994: step: 252/77, loss: 0.0003244962135795504 2023-01-23 23:38:02.144768: step: 256/77, loss: 5.079111360828392e-05 2023-01-23 23:38:03.411935: step: 260/77, loss: 7.101958999555791e-06 2023-01-23 23:38:04.672914: step: 264/77, loss: 4.6427543566096574e-05 2023-01-23 23:38:05.950114: step: 268/77, loss: 0.0014632672537118196 2023-01-23 23:38:07.276974: step: 272/77, loss: 4.038164718167536e-07 2023-01-23 23:38:08.586301: step: 276/77, loss: 0.006101526785641909 2023-01-23 23:38:09.881763: step: 280/77, loss: 0.0002811462036333978 2023-01-23 23:38:11.172892: step: 284/77, loss: 1.0437474884383846e-05 2023-01-23 23:38:12.417250: step: 288/77, loss: 1.8505952539271675e-06 2023-01-23 23:38:13.702231: step: 292/77, loss: 0.0013959399657323956 2023-01-23 23:38:14.999773: step: 296/77, loss: 0.017746856436133385 2023-01-23 23:38:16.259704: step: 300/77, loss: 0.00010268734331475571 2023-01-23 23:38:17.540564: step: 304/77, loss: 3.69758672604803e-05 2023-01-23 23:38:18.794812: step: 308/77, loss: 0.014934529550373554 2023-01-23 23:38:20.138509: step: 312/77, loss: 4.276600407138176e-07 2023-01-23 23:38:21.413732: step: 316/77, loss: 0.0001311105879722163 2023-01-23 23:38:22.652318: step: 320/77, loss: 5.811410801470629e-07 2023-01-23 23:38:23.943136: step: 324/77, loss: 0.0001222739228978753 2023-01-23 23:38:25.239038: step: 328/77, loss: 0.002665694570168853 2023-01-23 23:38:26.522582: step: 332/77, loss: 1.1662889846775215e-05 2023-01-23 23:38:27.821646: step: 336/77, loss: 4.84055954075302e-06 2023-01-23 23:38:29.119640: step: 340/77, loss: 0.09958529472351074 2023-01-23 23:38:30.413549: step: 344/77, loss: 9.598407814337406e-06 2023-01-23 23:38:31.724769: step: 348/77, loss: 2.340669379918836e-05 2023-01-23 23:38:33.016686: step: 352/77, loss: 4.857738531427458e-07 2023-01-23 23:38:34.288045: step: 356/77, loss: 2.4492983357049525e-05 2023-01-23 23:38:35.585439: step: 360/77, loss: 3.918967763638648e-07 2023-01-23 23:38:36.877682: step: 364/77, loss: 1.9481307390378788e-05 2023-01-23 23:38:38.176584: step: 368/77, loss: 2.6498250008444302e-05 2023-01-23 23:38:39.487116: step: 372/77, loss: 1.524307322142704e-06 2023-01-23 23:38:40.814072: step: 376/77, loss: 0.0005987274344079196 2023-01-23 23:38:42.102375: step: 380/77, loss: 0.0001938179339049384 2023-01-23 23:38:43.431489: step: 384/77, loss: 0.1129758358001709 2023-01-23 23:38:44.715247: step: 388/77, loss: 3.8423590012826025e-05 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Chinese: {'template': {'p': 0.9090909090909091, 'r': 0.5511811023622047, 'f1': 0.6862745098039216}, 'slot': {'p': 0.55, 'r': 0.01903114186851211, 'f1': 0.03678929765886288}, 'combined': 0.02524755721686668, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Korean: {'template': {'p': 0.9090909090909091, 'r': 0.5511811023622047, 'f1': 0.6862745098039216}, 'slot': {'p': 0.5121951219512195, 'r': 0.018166089965397925, 'f1': 0.03508771929824562}, 'combined': 0.02407980736154111, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Russian: {'template': {'p': 0.9090909090909091, 'r': 0.5511811023622047, 'f1': 0.6862745098039216}, 'slot': {'p': 0.5, 'r': 0.01730103806228374, 'f1': 0.033444816053511704}, 'combined': 0.02295232474260607, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 23 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:40:24.692505: step: 4/77, loss: 9.337160736322403e-05 2023-01-23 23:40:25.962627: step: 8/77, loss: 8.200939191738144e-06 2023-01-23 23:40:27.283614: step: 12/77, loss: 3.47598006555927e-06 2023-01-23 23:40:28.552968: step: 16/77, loss: 0.0028689149767160416 2023-01-23 23:40:29.824725: step: 20/77, loss: 0.0015908610075712204 2023-01-23 23:40:31.131628: step: 24/77, loss: 0.01427928265184164 2023-01-23 23:40:32.430238: step: 28/77, loss: 1.1142165021738037e-05 2023-01-23 23:40:33.684704: step: 32/77, loss: 0.0005018312949687243 2023-01-23 23:40:34.990137: step: 36/77, loss: 0.02262580208480358 2023-01-23 23:40:36.265628: step: 40/77, loss: 5.2290601161075756e-05 2023-01-23 23:40:37.599754: step: 44/77, loss: 4.255419844412245e-05 2023-01-23 23:40:38.858301: step: 48/77, loss: 7.872828427935019e-05 2023-01-23 23:40:40.130072: step: 52/77, loss: 0.0005638409056700766 2023-01-23 23:40:41.440549: step: 56/77, loss: 0.0003538989694789052 2023-01-23 23:40:42.714259: step: 60/77, loss: 0.0007779510924592614 2023-01-23 23:40:44.016835: step: 64/77, loss: 3.0188837172318017e-06 2023-01-23 23:40:45.324301: step: 68/77, loss: 0.022698700428009033 2023-01-23 23:40:46.668456: step: 72/77, loss: 0.006691284477710724 2023-01-23 23:40:47.955256: step: 76/77, loss: 0.0014096861705183983 2023-01-23 23:40:49.246631: step: 80/77, loss: 0.13911856710910797 2023-01-23 23:40:50.531841: step: 84/77, loss: 1.3243750800029375e-05 2023-01-23 23:40:51.835350: step: 88/77, loss: 0.0006453330861404538 2023-01-23 23:40:53.110154: step: 92/77, loss: 2.8783524612663314e-05 2023-01-23 23:40:54.380725: step: 96/77, loss: 0.04625452682375908 2023-01-23 23:40:55.682136: step: 100/77, loss: 4.312382225180045e-05 2023-01-23 23:40:56.981597: step: 104/77, loss: 0.000414960813941434 2023-01-23 23:40:58.272480: step: 108/77, loss: 3.2782541126152864e-08 2023-01-23 23:40:59.528707: step: 112/77, loss: 1.6369896911783144e-05 2023-01-23 23:41:00.835651: step: 116/77, loss: 1.0430801467009587e-07 2023-01-23 23:41:02.128341: step: 120/77, loss: 0.0005855010822415352 2023-01-23 23:41:03.393076: step: 124/77, loss: 0.00018316449131816626 2023-01-23 23:41:04.672486: step: 128/77, loss: 0.17900878190994263 2023-01-23 23:41:05.939260: step: 132/77, loss: 7.727268894086592e-06 2023-01-23 23:41:07.247720: step: 136/77, loss: 0.0010186383733525872 2023-01-23 23:41:08.493417: step: 140/77, loss: 9.662757656769827e-05 2023-01-23 23:41:09.744969: step: 144/77, loss: 2.028022436206811e-06 2023-01-23 23:41:11.022547: step: 148/77, loss: 1.0520020623516757e-06 2023-01-23 23:41:12.286323: step: 152/77, loss: 0.0005013812333345413 2023-01-23 23:41:13.561013: step: 156/77, loss: 0.00014438100333791226 2023-01-23 23:41:14.878965: step: 160/77, loss: 8.661092579131946e-06 2023-01-23 23:41:16.204921: step: 164/77, loss: 0.007426140364259481 2023-01-23 23:41:17.472842: step: 168/77, loss: 0.00012616364983841777 2023-01-23 23:41:18.785885: step: 172/77, loss: 0.030256465077400208 2023-01-23 23:41:20.130680: step: 176/77, loss: 3.499081140034832e-05 2023-01-23 23:41:21.431840: step: 180/77, loss: 0.0002845051931217313 2023-01-23 23:41:22.735008: step: 184/77, loss: 0.0301145538687706 2023-01-23 23:41:24.003162: step: 188/77, loss: 0.02152554877102375 2023-01-23 23:41:25.286768: step: 192/77, loss: 0.00019312337099108845 2023-01-23 23:41:26.605670: step: 196/77, loss: 0.0016593344043940306 2023-01-23 23:41:27.872643: step: 200/77, loss: 0.020302653312683105 2023-01-23 23:41:29.166116: step: 204/77, loss: 9.856343967840075e-05 2023-01-23 23:41:30.474065: step: 208/77, loss: 1.2278114809305407e-06 2023-01-23 23:41:31.733668: step: 212/77, loss: 0.05086364597082138 2023-01-23 23:41:33.007261: step: 216/77, loss: 1.1684308447001968e-05 2023-01-23 23:41:34.280387: step: 220/77, loss: 0.019000614061951637 2023-01-23 23:41:35.584083: step: 224/77, loss: 0.001930738682858646 2023-01-23 23:41:36.875479: step: 228/77, loss: 0.0012907341588288546 2023-01-23 23:41:38.217302: step: 232/77, loss: 1.937150528874554e-08 2023-01-23 23:41:39.488267: step: 236/77, loss: 4.7083499339350965e-06 2023-01-23 23:41:40.792573: step: 240/77, loss: 0.0022569689899683 2023-01-23 23:41:42.068986: step: 244/77, loss: 5.0357443797111046e-06 2023-01-23 23:41:43.361022: step: 248/77, loss: 1.2719761798507534e-05 2023-01-23 23:41:44.631933: step: 252/77, loss: 0.002375055104494095 2023-01-23 23:41:45.946907: step: 256/77, loss: 6.614408448513132e-06 2023-01-23 23:41:47.234379: step: 260/77, loss: 0.015899265184998512 2023-01-23 23:41:48.503307: step: 264/77, loss: 0.0006511949468404055 2023-01-23 23:41:49.798241: step: 268/77, loss: 9.760146895132493e-07 2023-01-23 23:41:51.072721: step: 272/77, loss: 0.004656706005334854 2023-01-23 23:41:52.403865: step: 276/77, loss: 1.0758431017166004e-06 2023-01-23 23:41:53.698498: step: 280/77, loss: 4.061367144458927e-05 2023-01-23 23:41:54.947675: step: 284/77, loss: 4.6537832531612366e-05 2023-01-23 23:41:56.249068: step: 288/77, loss: 0.0011781684588640928 2023-01-23 23:41:57.533687: step: 292/77, loss: 8.895739301806316e-05 2023-01-23 23:41:58.863296: step: 296/77, loss: 0.0001069565478246659 2023-01-23 23:42:00.134274: step: 300/77, loss: 0.006709199398756027 2023-01-23 23:42:01.389820: step: 304/77, loss: 0.044382717460393906 2023-01-23 23:42:02.626945: step: 308/77, loss: 0.0005866599385626614 2023-01-23 23:42:03.920799: step: 312/77, loss: 0.00019547372357919812 2023-01-23 23:42:05.193530: step: 316/77, loss: 1.966942875242239e-07 2023-01-23 23:42:06.490289: step: 320/77, loss: 0.016215059906244278 2023-01-23 23:42:07.767434: step: 324/77, loss: 1.3859395039617084e-05 2023-01-23 23:42:09.088967: step: 328/77, loss: 0.005409374833106995 2023-01-23 23:42:10.384868: step: 332/77, loss: 0.02341928333044052 2023-01-23 23:42:11.672451: step: 336/77, loss: 1.094659910449991e-05 2023-01-23 23:42:12.986434: step: 340/77, loss: 5.960464122267695e-09 2023-01-23 23:42:14.300481: step: 344/77, loss: 3.83093856726191e-06 2023-01-23 23:42:15.640253: step: 348/77, loss: 0.008367887698113918 2023-01-23 23:42:16.888021: step: 352/77, loss: 5.503306510945549e-06 2023-01-23 23:42:18.197214: step: 356/77, loss: 1.6103691450553015e-05 2023-01-23 23:42:19.520707: step: 360/77, loss: 0.07554967701435089 2023-01-23 23:42:20.795077: step: 364/77, loss: 4.172321865780759e-08 2023-01-23 23:42:22.065204: step: 368/77, loss: 0.00906360149383545 2023-01-23 23:42:23.344781: step: 372/77, loss: 1.6774265532149002e-05 2023-01-23 23:42:24.612528: step: 376/77, loss: 0.03743258863687515 2023-01-23 23:42:25.929160: step: 380/77, loss: 5.318142211763188e-05 2023-01-23 23:42:27.186927: step: 384/77, loss: 0.01306243147701025 2023-01-23 23:42:28.475029: step: 388/77, loss: 9.19883168535307e-05 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Chinese: {'template': {'p': 0.9367088607594937, 'r': 0.5826771653543307, 'f1': 0.7184466019417475}, 'slot': {'p': 0.5897435897435898, 'r': 0.019896193771626297, 'f1': 0.03849372384937238}, 'combined': 0.02765568509566559, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Korean: {'template': {'p': 0.9367088607594937, 'r': 0.5826771653543307, 'f1': 0.7184466019417475}, 'slot': {'p': 0.5897435897435898, 'r': 0.019896193771626297, 'f1': 0.03849372384937238}, 'combined': 0.02765568509566559, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Russian: {'template': {'p': 0.9367088607594937, 'r': 0.5826771653543307, 'f1': 0.7184466019417475}, 'slot': {'p': 0.5897435897435898, 'r': 0.019896193771626297, 'f1': 0.03849372384937238}, 'combined': 0.02765568509566559, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 24 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:44:08.702601: step: 4/77, loss: 0.00380399776622653 2023-01-23 23:44:10.039984: step: 8/77, loss: 3.5082361137028784e-05 2023-01-23 23:44:11.315439: step: 12/77, loss: 1.9221988623030484e-06 2023-01-23 23:44:12.615943: step: 16/77, loss: 0.015467840246856213 2023-01-23 23:44:13.878819: step: 20/77, loss: 8.227418584283441e-05 2023-01-23 23:44:15.150749: step: 24/77, loss: 4.65135326521704e-06 2023-01-23 23:44:16.373087: step: 28/77, loss: 0.0009193022851832211 2023-01-23 23:44:17.652740: step: 32/77, loss: 6.777806447644252e-06 2023-01-23 23:44:18.921254: step: 36/77, loss: 0.0033016628585755825 2023-01-23 23:44:20.252736: step: 40/77, loss: 0.0022671720944344997 2023-01-23 23:44:21.535644: step: 44/77, loss: 1.921259354276117e-05 2023-01-23 23:44:22.814939: step: 48/77, loss: 6.093090632930398e-06 2023-01-23 23:44:24.075768: step: 52/77, loss: 1.2281492672627792e-05 2023-01-23 23:44:25.370780: step: 56/77, loss: 0.0006353338831104338 2023-01-23 23:44:26.655382: step: 60/77, loss: 2.990082612086553e-05 2023-01-23 23:44:27.899560: step: 64/77, loss: 0.027835655957460403 2023-01-23 23:44:29.180819: step: 68/77, loss: 1.7180659597215708e-06 2023-01-23 23:44:30.453983: step: 72/77, loss: 0.052142348140478134 2023-01-23 23:44:31.746403: step: 76/77, loss: 2.086162353975851e-08 2023-01-23 23:44:32.989115: step: 80/77, loss: 9.739868255564943e-05 2023-01-23 23:44:34.258449: step: 84/77, loss: 9.849472917267121e-07 2023-01-23 23:44:35.515027: step: 88/77, loss: 2.7464941013022326e-05 2023-01-23 23:44:36.786253: step: 92/77, loss: 0.007449309341609478 2023-01-23 23:44:38.064072: step: 96/77, loss: 7.262600411195308e-05 2023-01-23 23:44:39.344675: step: 100/77, loss: 1.0117654483110528e-06 2023-01-23 23:44:40.604727: step: 104/77, loss: 0.00021382025443017483 2023-01-23 23:44:41.917228: step: 108/77, loss: 1.078596142178867e-05 2023-01-23 23:44:43.210533: step: 112/77, loss: 8.99299448064994e-06 2023-01-23 23:44:44.482287: step: 116/77, loss: 1.023694949253695e-06 2023-01-23 23:44:45.731605: step: 120/77, loss: 2.2249520043260418e-05 2023-01-23 23:44:46.982241: step: 124/77, loss: 0.02711966075003147 2023-01-23 23:44:48.272791: step: 128/77, loss: 9.983756399378763e-08 2023-01-23 23:44:49.548273: step: 132/77, loss: 0.00011487719893921167 2023-01-23 23:44:50.827738: step: 136/77, loss: 0.013438182882964611 2023-01-23 23:44:52.098913: step: 140/77, loss: 0.017233524471521378 2023-01-23 23:44:53.352800: step: 144/77, loss: 3.7252888773764425e-08 2023-01-23 23:44:54.658280: step: 148/77, loss: 0.013860609382390976 2023-01-23 23:44:55.921071: step: 152/77, loss: 1.5586372228426626e-06 2023-01-23 23:44:57.218605: step: 156/77, loss: 1.7074664356186986e-05 2023-01-23 23:44:58.512999: step: 160/77, loss: 9.230232535628602e-05 2023-01-23 23:44:59.785064: step: 164/77, loss: 3.665661836294021e-07 2023-01-23 23:45:01.111762: step: 168/77, loss: 3.93847658415325e-05 2023-01-23 23:45:02.412735: step: 172/77, loss: 5.1080780394840986e-05 2023-01-23 23:45:03.709047: step: 176/77, loss: 0.006950449664145708 2023-01-23 23:45:04.984080: step: 180/77, loss: 1.3340352779778186e-05 2023-01-23 23:45:06.251578: step: 184/77, loss: 1.596692527527921e-05 2023-01-23 23:45:07.525489: step: 188/77, loss: 8.556472312193364e-05 2023-01-23 23:45:08.774596: step: 192/77, loss: 2.077104227282689e-06 2023-01-23 23:45:10.087322: step: 196/77, loss: 1.8318276488571428e-05 2023-01-23 23:45:11.357501: step: 200/77, loss: 0.015348915942013264 2023-01-23 23:45:12.665018: step: 204/77, loss: 5.960463678178485e-09 2023-01-23 23:45:13.928359: step: 208/77, loss: 0.001952013815753162 2023-01-23 23:45:15.204948: step: 212/77, loss: 0.021748293191194534 2023-01-23 23:45:16.434104: step: 216/77, loss: 1.0564673402768676e-06 2023-01-23 23:45:17.763334: step: 220/77, loss: 0.00024628365645185113 2023-01-23 23:45:19.021679: step: 224/77, loss: 0.018810153007507324 2023-01-23 23:45:20.326330: step: 228/77, loss: 0.07416260987520218 2023-01-23 23:45:21.579758: step: 232/77, loss: 0.001658955472521484 2023-01-23 23:45:22.865178: step: 236/77, loss: 1.947512600963819e-06 2023-01-23 23:45:24.189934: step: 240/77, loss: 0.0003093902487307787 2023-01-23 23:45:25.459047: step: 244/77, loss: 2.1571084289462306e-05 2023-01-23 23:45:26.721879: step: 248/77, loss: 0.03571411967277527 2023-01-23 23:45:28.020008: step: 252/77, loss: 1.9538061678758822e-05 2023-01-23 23:45:29.295448: step: 256/77, loss: 9.11098686628975e-06 2023-01-23 23:45:30.592467: step: 260/77, loss: 0.0015212477883324027 2023-01-23 23:45:31.888234: step: 264/77, loss: 2.640430466271937e-05 2023-01-23 23:45:33.162535: step: 268/77, loss: 3.8302790926536545e-05 2023-01-23 23:45:34.471121: step: 272/77, loss: 0.04266560077667236 2023-01-23 23:45:35.775726: step: 276/77, loss: 0.008817881345748901 2023-01-23 23:45:37.041157: step: 280/77, loss: 0.005077583249658346 2023-01-23 23:45:38.324716: step: 284/77, loss: 0.006172202993184328 2023-01-23 23:45:39.641525: step: 288/77, loss: 1.3753941857430618e-05 2023-01-23 23:45:40.963727: step: 292/77, loss: 2.6462712412467226e-06 2023-01-23 23:45:42.254277: step: 296/77, loss: 2.9802320611338473e-09 2023-01-23 23:45:43.512511: step: 300/77, loss: 0.01422153040766716 2023-01-23 23:45:44.814231: step: 304/77, loss: 0.07386370003223419 2023-01-23 23:45:46.101361: step: 308/77, loss: 1.993622390727978e-06 2023-01-23 23:45:47.358440: step: 312/77, loss: 0.0001149525196524337 2023-01-23 23:45:48.687287: step: 316/77, loss: 5.941071140114218e-05 2023-01-23 23:45:49.959491: step: 320/77, loss: 1.8775371302126587e-07 2023-01-23 23:45:51.227207: step: 324/77, loss: 0.0025986775290220976 2023-01-23 23:45:52.515720: step: 328/77, loss: 0.03418916463851929 2023-01-23 23:45:53.799860: step: 332/77, loss: 2.363630301260855e-05 2023-01-23 23:45:55.111619: step: 336/77, loss: 0.03347557410597801 2023-01-23 23:45:56.382768: step: 340/77, loss: 0.01019981037825346 2023-01-23 23:45:57.737477: step: 344/77, loss: 0.01401336770504713 2023-01-23 23:45:59.026712: step: 348/77, loss: 0.00014336804451886564 2023-01-23 23:46:00.353464: step: 352/77, loss: 0.008555217646062374 2023-01-23 23:46:01.614827: step: 356/77, loss: 0.008543770760297775 2023-01-23 23:46:02.898088: step: 360/77, loss: 0.007736127823591232 2023-01-23 23:46:04.167699: step: 364/77, loss: 0.005396129097789526 2023-01-23 23:46:05.412367: step: 368/77, loss: 0.0014105373993515968 2023-01-23 23:46:06.732022: step: 372/77, loss: 0.00011384568642824888 2023-01-23 23:46:08.061721: step: 376/77, loss: 0.00010466657113283873 2023-01-23 23:46:09.331575: step: 380/77, loss: 2.801337132041226e-06 2023-01-23 23:46:10.606315: step: 384/77, loss: 0.0005902171251364052 2023-01-23 23:46:11.863366: step: 388/77, loss: 0.00267789582721889 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 24} Test Chinese: {'template': {'p': 0.9066666666666666, 'r': 0.5354330708661418, 'f1': 0.6732673267326733}, 'slot': {'p': 0.5405405405405406, 'r': 0.01730103806228374, 'f1': 0.03352891869237217}, 'combined': 0.022573925456250574, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 24} Test Korean: {'template': {'p': 0.9078947368421053, 'r': 0.5433070866141733, 'f1': 0.6798029556650248}, 'slot': {'p': 0.5263157894736842, 'r': 0.01730103806228374, 'f1': 0.03350083752093802}, 'combined': 0.02277396836398743, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 24} Test Russian: {'template': {'p': 0.9066666666666666, 'r': 0.5354330708661418, 'f1': 0.6732673267326733}, 'slot': {'p': 0.5277777777777778, 'r': 0.01643598615916955, 'f1': 0.031879194630872486}, 'combined': 0.02146322014751811, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 25 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:47:52.088552: step: 4/77, loss: 7.5849900895264e-05 2023-01-23 23:47:53.345730: step: 8/77, loss: 0.0002651477698236704 2023-01-23 23:47:54.577478: step: 12/77, loss: 2.5703475330374204e-06 2023-01-23 23:47:55.854572: step: 16/77, loss: 0.0011636014096438885 2023-01-23 23:47:57.175975: step: 20/77, loss: 0.0013986665289849043 2023-01-23 23:47:58.486287: step: 24/77, loss: 2.3691984551987844e-06 2023-01-23 23:47:59.717764: step: 28/77, loss: 4.116630952921696e-05 2023-01-23 23:48:01.061162: step: 32/77, loss: 0.037189774215221405 2023-01-23 23:48:02.374937: step: 36/77, loss: 0.03908061981201172 2023-01-23 23:48:03.648074: step: 40/77, loss: 0.022363733500242233 2023-01-23 23:48:04.936765: step: 44/77, loss: 0.00549747608602047 2023-01-23 23:48:06.207738: step: 48/77, loss: 0.0003269324661232531 2023-01-23 23:48:07.500555: step: 52/77, loss: 0.0001640704576857388 2023-01-23 23:48:08.747866: step: 56/77, loss: 0.00024002409190870821 2023-01-23 23:48:09.989523: step: 60/77, loss: 0.0023460511583834887 2023-01-23 23:48:11.289190: step: 64/77, loss: 0.012938913889229298 2023-01-23 23:48:12.552936: step: 68/77, loss: 0.009058503434062004 2023-01-23 23:48:13.835601: step: 72/77, loss: 0.4155775308609009 2023-01-23 23:48:15.117378: step: 76/77, loss: 0.045352742075920105 2023-01-23 23:48:16.416408: step: 80/77, loss: 0.0008751029963605106 2023-01-23 23:48:17.678762: step: 84/77, loss: 1.10414327991748e-06 2023-01-23 23:48:18.956018: step: 88/77, loss: 0.00023450664593838155 2023-01-23 23:48:20.290146: step: 92/77, loss: 0.05811518058180809 2023-01-23 23:48:21.554629: step: 96/77, loss: 0.0003901486925315112 2023-01-23 23:48:22.827302: step: 100/77, loss: 0.00014174467651173472 2023-01-23 23:48:24.117561: step: 104/77, loss: 1.921330658660736e-05 2023-01-23 23:48:25.364194: step: 108/77, loss: 2.3113034330890514e-05 2023-01-23 23:48:26.655751: step: 112/77, loss: 1.6413274352089502e-05 2023-01-23 23:48:27.965094: step: 116/77, loss: 5.45346483704634e-06 2023-01-23 23:48:29.246440: step: 120/77, loss: 5.947385943727568e-06 2023-01-23 23:48:30.538901: step: 124/77, loss: 4.385063220979646e-06 2023-01-23 23:48:31.880515: step: 128/77, loss: 0.0008605056791566312 2023-01-23 23:48:33.185269: step: 132/77, loss: 0.0001738389692036435 2023-01-23 23:48:34.445349: step: 136/77, loss: 1.816932126530446e-05 2023-01-23 23:48:35.746687: step: 140/77, loss: 0.002159570576623082 2023-01-23 23:48:37.041969: step: 144/77, loss: 9.741610119817778e-05 2023-01-23 23:48:38.318439: step: 148/77, loss: 0.024993371218442917 2023-01-23 23:48:39.638652: step: 152/77, loss: 1.113360485760495e-05 2023-01-23 23:48:40.887153: step: 156/77, loss: 2.1446699975058436e-05 2023-01-23 23:48:42.224042: step: 160/77, loss: 0.00607384042814374 2023-01-23 23:48:43.514888: step: 164/77, loss: 0.00035711575765162706 2023-01-23 23:48:44.799962: step: 168/77, loss: 2.063775173155591e-05 2023-01-23 23:48:46.093210: step: 172/77, loss: 0.0007445422234013677 2023-01-23 23:48:47.390270: step: 176/77, loss: 0.021952372044324875 2023-01-23 23:48:48.707881: step: 180/77, loss: 4.619354498913708e-08 2023-01-23 23:48:50.016628: step: 184/77, loss: 8.031611287151463e-07 2023-01-23 23:48:51.282360: step: 188/77, loss: 1.2899691682832781e-05 2023-01-23 23:48:52.548540: step: 192/77, loss: 0.0002587594208307564 2023-01-23 23:48:53.835189: step: 196/77, loss: 1.0057990493805846e-06 2023-01-23 23:48:55.110306: step: 200/77, loss: 0.00792799610644579 2023-01-23 23:48:56.393880: step: 204/77, loss: 0.00010202324483543634 2023-01-23 23:48:57.639980: step: 208/77, loss: 0.0004384967323858291 2023-01-23 23:48:58.895328: step: 212/77, loss: 0.0008030325989238918 2023-01-23 23:49:00.146379: step: 216/77, loss: 0.2682453393936157 2023-01-23 23:49:01.467430: step: 220/77, loss: 0.005948225501924753 2023-01-23 23:49:02.782346: step: 224/77, loss: 3.5166578982170904e-07 2023-01-23 23:49:04.068633: step: 228/77, loss: 6.484018740593456e-06 2023-01-23 23:49:05.385481: step: 232/77, loss: 0.012864273972809315 2023-01-23 23:49:06.655869: step: 236/77, loss: 1.4885420114296721e-06 2023-01-23 23:49:07.923153: step: 240/77, loss: 0.0001642719144001603 2023-01-23 23:49:09.254240: step: 244/77, loss: 0.012073754332959652 2023-01-23 23:49:10.526452: step: 248/77, loss: 0.009761764667928219 2023-01-23 23:49:11.788417: step: 252/77, loss: 6.24136646365514e-06 2023-01-23 23:49:13.025289: step: 256/77, loss: 0.00013278864207677543 2023-01-23 23:49:14.338230: step: 260/77, loss: 0.0011594881070777774 2023-01-23 23:49:15.603200: step: 264/77, loss: 3.1124171073315665e-05 2023-01-23 23:49:16.916532: step: 268/77, loss: 0.0003066223580390215 2023-01-23 23:49:18.263635: step: 272/77, loss: 0.014135126024484634 2023-01-23 23:49:19.534805: step: 276/77, loss: 0.01993345282971859 2023-01-23 23:49:20.862244: step: 280/77, loss: 0.00028734607622027397 2023-01-23 23:49:22.168682: step: 284/77, loss: 0.006775941699743271 2023-01-23 23:49:23.451029: step: 288/77, loss: 0.00016594542830716819 2023-01-23 23:49:24.754445: step: 292/77, loss: 1.6688773030182347e-06 2023-01-23 23:49:26.050848: step: 296/77, loss: 2.0462706743273884e-05 2023-01-23 23:49:27.341900: step: 300/77, loss: 1.6956513491095393e-06 2023-01-23 23:49:28.636361: step: 304/77, loss: 0.00023198648705147207 2023-01-23 23:49:29.941890: step: 308/77, loss: 0.002622495638206601 2023-01-23 23:49:31.209100: step: 312/77, loss: 0.05106724798679352 2023-01-23 23:49:32.497509: step: 316/77, loss: 0.001140963053330779 2023-01-23 23:49:33.829328: step: 320/77, loss: 0.005684789270162582 2023-01-23 23:49:35.092376: step: 324/77, loss: 2.4211341951740906e-05 2023-01-23 23:49:36.337255: step: 328/77, loss: 7.808050668245414e-07 2023-01-23 23:49:37.631884: step: 332/77, loss: 0.019995568320155144 2023-01-23 23:49:38.942213: step: 336/77, loss: 3.076811117352918e-05 2023-01-23 23:49:40.235636: step: 340/77, loss: 2.486864832462743e-05 2023-01-23 23:49:41.559654: step: 344/77, loss: 9.378846880281344e-05 2023-01-23 23:49:42.877157: step: 348/77, loss: 0.0006190786371007562 2023-01-23 23:49:44.155670: step: 352/77, loss: 0.0008651064126752317 2023-01-23 23:49:45.479482: step: 356/77, loss: 0.00035393013968132436 2023-01-23 23:49:46.742821: step: 360/77, loss: 0.006651232950389385 2023-01-23 23:49:48.025607: step: 364/77, loss: 2.03934105229564e-05 2023-01-23 23:49:49.266753: step: 368/77, loss: 5.334555339686631e-07 2023-01-23 23:49:50.568484: step: 372/77, loss: 1.5221969078993425e-05 2023-01-23 23:49:51.805379: step: 376/77, loss: 0.0008514021174050868 2023-01-23 23:49:53.127622: step: 380/77, loss: 0.0064077554270625114 2023-01-23 23:49:54.383682: step: 384/77, loss: 6.021185254212469e-05 2023-01-23 23:49:55.703687: step: 388/77, loss: 0.04518267512321472 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Chinese: {'template': {'p': 0.9358974358974359, 'r': 0.5748031496062992, 'f1': 0.7121951219512195}, 'slot': {'p': 0.6190476190476191, 'r': 0.02249134948096886, 'f1': 0.04340567612687813}, 'combined': 0.030913310802557107, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Korean: {'template': {'p': 0.9358974358974359, 'r': 0.5748031496062992, 'f1': 0.7121951219512195}, 'slot': {'p': 0.6190476190476191, 'r': 0.02249134948096886, 'f1': 0.04340567612687813}, 'combined': 0.030913310802557107, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Russian: {'template': {'p': 0.9358974358974359, 'r': 0.5748031496062992, 'f1': 0.7121951219512195}, 'slot': {'p': 0.6190476190476191, 'r': 0.02249134948096886, 'f1': 0.04340567612687813}, 'combined': 0.030913310802557107, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 26 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:51:36.119793: step: 4/77, loss: 1.244232748831564e-06 2023-01-23 23:51:37.426205: step: 8/77, loss: 0.01106318924576044 2023-01-23 23:51:38.704529: step: 12/77, loss: 1.7734229913912714e-05 2023-01-23 23:51:39.984971: step: 16/77, loss: 0.04055573791265488 2023-01-23 23:51:41.302183: step: 20/77, loss: 0.011438457295298576 2023-01-23 23:51:42.569757: step: 24/77, loss: 0.02927778847515583 2023-01-23 23:51:43.780008: step: 28/77, loss: 0.00024149783712346107 2023-01-23 23:51:45.080953: step: 32/77, loss: 3.0069857075432083e-06 2023-01-23 23:51:46.376811: step: 36/77, loss: 0.0003100955509580672 2023-01-23 23:51:47.675815: step: 40/77, loss: 0.031127629801630974 2023-01-23 23:51:48.974931: step: 44/77, loss: 0.00035606580786406994 2023-01-23 23:51:50.294134: step: 48/77, loss: 0.0004578246735036373 2023-01-23 23:51:51.549044: step: 52/77, loss: 0.005972975865006447 2023-01-23 23:51:52.867948: step: 56/77, loss: 0.0008780988864600658 2023-01-23 23:51:54.121368: step: 60/77, loss: 0.0060501242987811565 2023-01-23 23:51:55.427191: step: 64/77, loss: 1.1174272003700025e-05 2023-01-23 23:51:56.719292: step: 68/77, loss: 0.000842059263959527 2023-01-23 23:51:57.954320: step: 72/77, loss: 5.181662800168851e-06 2023-01-23 23:51:59.207917: step: 76/77, loss: 3.5017495747524663e-07 2023-01-23 23:52:00.502997: step: 80/77, loss: 0.00163843494374305 2023-01-23 23:52:01.775668: step: 84/77, loss: 1.0385938367107883e-06 2023-01-23 23:52:03.050744: step: 88/77, loss: 0.0030117020942270756 2023-01-23 23:52:04.309417: step: 92/77, loss: 7.071460004226537e-06 2023-01-23 23:52:05.630833: step: 96/77, loss: 0.00018569506937637925 2023-01-23 23:52:06.911231: step: 100/77, loss: 5.652254913002253e-06 2023-01-23 23:52:08.186320: step: 104/77, loss: 0.00032322845072485507 2023-01-23 23:52:09.460786: step: 108/77, loss: 0.000852587225381285 2023-01-23 23:52:10.743468: step: 112/77, loss: 2.957681135740131e-05 2023-01-23 23:52:12.011757: step: 116/77, loss: 7.405157703033183e-06 2023-01-23 23:52:13.304338: step: 120/77, loss: 1.4922447917342652e-05 2023-01-23 23:52:14.585681: step: 124/77, loss: 0.02082359604537487 2023-01-23 23:52:15.877499: step: 128/77, loss: 6.705517563432295e-08 2023-01-23 23:52:17.118170: step: 132/77, loss: 0.00026560970582067966 2023-01-23 23:52:18.394519: step: 136/77, loss: 1.1324837601023319e-07 2023-01-23 23:52:19.717042: step: 140/77, loss: 0.0005567181506194174 2023-01-23 23:52:20.914771: step: 144/77, loss: 0.0011684320634230971 2023-01-23 23:52:22.219724: step: 148/77, loss: 0.0008083868306130171 2023-01-23 23:52:23.473974: step: 152/77, loss: 0.00045696506276726723 2023-01-23 23:52:24.791601: step: 156/77, loss: 2.1207903046160936e-05 2023-01-23 23:52:26.055950: step: 160/77, loss: 0.0016548774437978864 2023-01-23 23:52:27.314686: step: 164/77, loss: 3.3676190014375607e-07 2023-01-23 23:52:28.609530: step: 168/77, loss: 1.5362414842456928e-06 2023-01-23 23:52:29.859097: step: 172/77, loss: 3.1292412216998855e-08 2023-01-23 23:52:31.124888: step: 176/77, loss: 0.0032324367202818394 2023-01-23 23:52:32.431857: step: 180/77, loss: 1.1079687283199746e-05 2023-01-23 23:52:33.693069: step: 184/77, loss: 0.0 2023-01-23 23:52:34.932854: step: 188/77, loss: 0.004545817617326975 2023-01-23 23:52:36.187533: step: 192/77, loss: 0.0003192027797922492 2023-01-23 23:52:37.517423: step: 196/77, loss: 0.012223941273987293 2023-01-23 23:52:38.827880: step: 200/77, loss: 4.3958004880551016e-07 2023-01-23 23:52:40.148280: step: 204/77, loss: 0.0162535160779953 2023-01-23 23:52:41.409010: step: 208/77, loss: 1.4048951925360598e-05 2023-01-23 23:52:42.674338: step: 212/77, loss: 3.874299991935004e-08 2023-01-23 23:52:43.966671: step: 216/77, loss: 5.006731953471899e-07 2023-01-23 23:52:45.278505: step: 220/77, loss: 0.009009288623929024 2023-01-23 23:52:46.549305: step: 224/77, loss: 2.1173013010411523e-05 2023-01-23 23:52:47.871502: step: 228/77, loss: 1.7163351003546268e-05 2023-01-23 23:52:49.139186: step: 232/77, loss: 0.00011633327085291967 2023-01-23 23:52:50.431899: step: 236/77, loss: 8.594244718551636e-05 2023-01-23 23:52:51.713300: step: 240/77, loss: 9.750283788889647e-05 2023-01-23 23:52:52.982289: step: 244/77, loss: 1.0430809105343997e-08 2023-01-23 23:52:54.306406: step: 248/77, loss: 1.50942094023776e-06 2023-01-23 23:52:55.597278: step: 252/77, loss: 3.769965246647189e-07 2023-01-23 23:52:56.898176: step: 256/77, loss: 4.2295978346373886e-05 2023-01-23 23:52:58.141756: step: 260/77, loss: 7.710335921728984e-06 2023-01-23 23:52:59.366753: step: 264/77, loss: 7.4505792646561986e-09 2023-01-23 23:53:00.650029: step: 268/77, loss: 5.066387842589393e-08 2023-01-23 23:53:01.938741: step: 272/77, loss: 2.1589827156276442e-05 2023-01-23 23:53:03.210012: step: 276/77, loss: 0.0001471538416808471 2023-01-23 23:53:04.527725: step: 280/77, loss: 3.8839229091536254e-05 2023-01-23 23:53:05.793246: step: 284/77, loss: 9.147621312877163e-06 2023-01-23 23:53:07.014527: step: 288/77, loss: 1.2099409332222422e-06 2023-01-23 23:53:08.289397: step: 292/77, loss: 0.00029746367363259196 2023-01-23 23:53:09.555061: step: 296/77, loss: 5.811446612824511e-08 2023-01-23 23:53:10.859648: step: 300/77, loss: 4.01707711716881e-06 2023-01-23 23:53:12.127275: step: 304/77, loss: 2.0085465166630456e-06 2023-01-23 23:53:13.430020: step: 308/77, loss: 8.670324859849643e-06 2023-01-23 23:53:14.726041: step: 312/77, loss: 0.016092410311102867 2023-01-23 23:53:15.980195: step: 316/77, loss: 1.2665944382206362e-07 2023-01-23 23:53:17.208204: step: 320/77, loss: 6.273311328186537e-07 2023-01-23 23:53:18.472872: step: 324/77, loss: 0.0018693513702601194 2023-01-23 23:53:19.791548: step: 328/77, loss: 2.8741076221194817e-06 2023-01-23 23:53:21.043023: step: 332/77, loss: 0.018234344199299812 2023-01-23 23:53:22.324746: step: 336/77, loss: 8.404001619055634e-07 2023-01-23 23:53:23.639901: step: 340/77, loss: 7.748588615186236e-08 2023-01-23 23:53:24.929948: step: 344/77, loss: 5.752379365731031e-06 2023-01-23 23:53:26.237755: step: 348/77, loss: 3.501755827528541e-07 2023-01-23 23:53:27.491088: step: 352/77, loss: 0.007864338345825672 2023-01-23 23:53:28.720302: step: 356/77, loss: 0.0035333663690835238 2023-01-23 23:53:29.988152: step: 360/77, loss: 9.834761272031756e-08 2023-01-23 23:53:31.295173: step: 364/77, loss: 1.4901159417490817e-08 2023-01-23 23:53:32.568836: step: 368/77, loss: 0.00010256327368551865 2023-01-23 23:53:33.846887: step: 372/77, loss: 6.437193178499001e-07 2023-01-23 23:53:35.150910: step: 376/77, loss: 4.33922978118062e-05 2023-01-23 23:53:36.455565: step: 380/77, loss: 9.536726963688125e-08 2023-01-23 23:53:37.714948: step: 384/77, loss: 2.984027196362149e-05 2023-01-23 23:53:38.997669: step: 388/77, loss: 1.1920904796625109e-07 ================================================== Loss: 0.003 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 26} Test Chinese: {'template': {'p': 0.8928571428571429, 'r': 0.5905511811023622, 'f1': 0.7109004739336493}, 'slot': {'p': 0.5652173913043478, 'r': 0.02249134948096886, 'f1': 0.04326123128119801}, 'combined': 0.03075442982075688, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 26} Test Korean: {'template': {'p': 0.9036144578313253, 'r': 0.5905511811023622, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5909090909090909, 'r': 0.02249134948096886, 'f1': 0.043333333333333335}, 'combined': 0.03095238095238095, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 26} Test Russian: {'template': {'p': 0.9036144578313253, 'r': 0.5905511811023622, 'f1': 0.7142857142857142}, 'slot': {'p': 0.6046511627906976, 'r': 0.02249134948096886, 'f1': 0.043369474562135114}, 'combined': 0.03097819611581079, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 27 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:55:18.810581: step: 4/77, loss: 0.0009632143774069846 2023-01-23 23:55:20.090557: step: 8/77, loss: 9.709888399811462e-05 2023-01-23 23:55:21.390250: step: 12/77, loss: 7.260393886099337e-06 2023-01-23 23:55:22.613519: step: 16/77, loss: 2.3245681290973152e-07 2023-01-23 23:55:23.889931: step: 20/77, loss: 9.16340468393173e-06 2023-01-23 23:55:25.141254: step: 24/77, loss: 1.0147099601454102e-05 2023-01-23 23:55:26.467727: step: 28/77, loss: 0.005319915246218443 2023-01-23 23:55:27.727052: step: 32/77, loss: 1.7333022697130218e-05 2023-01-23 23:55:28.992401: step: 36/77, loss: 0.07323425263166428 2023-01-23 23:55:30.248653: step: 40/77, loss: 0.01170014776289463 2023-01-23 23:55:31.530343: step: 44/77, loss: 4.917379925473142e-08 2023-01-23 23:55:32.800879: step: 48/77, loss: 0.00034738657996058464 2023-01-23 23:55:34.026892: step: 52/77, loss: 1.9967454534253193e-07 2023-01-23 23:55:35.288385: step: 56/77, loss: 1.3398454029811546e-05 2023-01-23 23:55:36.544554: step: 60/77, loss: 0.012574520893394947 2023-01-23 23:55:37.866503: step: 64/77, loss: 8.59444207890192e-06 2023-01-23 23:55:39.187508: step: 68/77, loss: 8.180574582183908e-07 2023-01-23 23:55:40.476042: step: 72/77, loss: 5.900776045564271e-07 2023-01-23 23:55:41.756556: step: 76/77, loss: 1.0342812856833916e-05 2023-01-23 23:55:43.004191: step: 80/77, loss: 0.004377368837594986 2023-01-23 23:55:44.309317: step: 84/77, loss: 0.00016269163461402059 2023-01-23 23:55:45.616822: step: 88/77, loss: 2.8387847123667598e-05 2023-01-23 23:55:46.903584: step: 92/77, loss: 0.0003090954851359129 2023-01-23 23:55:48.200763: step: 96/77, loss: 4.673906005336903e-06 2023-01-23 23:55:49.495890: step: 100/77, loss: 0.00022745260503143072 2023-01-23 23:55:50.799573: step: 104/77, loss: 3.643586751422845e-05 2023-01-23 23:55:52.060503: step: 108/77, loss: 1.0860110705834813e-05 2023-01-23 23:55:53.381521: step: 112/77, loss: 0.007070006802678108 2023-01-23 23:55:54.638504: step: 116/77, loss: 0.017348483204841614 2023-01-23 23:55:55.905183: step: 120/77, loss: 4.704351886175573e-05 2023-01-23 23:55:57.212072: step: 124/77, loss: 0.018814612179994583 2023-01-23 23:55:58.487452: step: 128/77, loss: 0.0023173014633357525 2023-01-23 23:55:59.769339: step: 132/77, loss: 4.932226147502661e-07 2023-01-23 23:56:01.058839: step: 136/77, loss: 0.00010033223225036636 2023-01-23 23:56:02.366666: step: 140/77, loss: 3.6354311305331066e-05 2023-01-23 23:56:03.658490: step: 144/77, loss: 0.00012139079626649618 2023-01-23 23:56:04.952820: step: 148/77, loss: 2.742990545812063e-05 2023-01-23 23:56:06.270721: step: 152/77, loss: 0.025909584015607834 2023-01-23 23:56:07.598151: step: 156/77, loss: 1.2906899428344332e-05 2023-01-23 23:56:08.913649: step: 160/77, loss: 0.018763171508908272 2023-01-23 23:56:10.225829: step: 164/77, loss: 0.004122719168663025 2023-01-23 23:56:11.496628: step: 168/77, loss: 0.0005570485373027623 2023-01-23 23:56:12.780592: step: 172/77, loss: 0.013160121627151966 2023-01-23 23:56:14.035259: step: 176/77, loss: 0.00028207077411934733 2023-01-23 23:56:15.357073: step: 180/77, loss: 0.013939131051301956 2023-01-23 23:56:16.656060: step: 184/77, loss: 0.013419135473668575 2023-01-23 23:56:17.957326: step: 188/77, loss: 2.6850261747313198e-06 2023-01-23 23:56:19.246942: step: 192/77, loss: 3.027114325959701e-05 2023-01-23 23:56:20.562326: step: 196/77, loss: 2.2202648608526943e-07 2023-01-23 23:56:21.859098: step: 200/77, loss: 1.520577643532306e-05 2023-01-23 23:56:23.117960: step: 204/77, loss: 5.405565389082767e-06 2023-01-23 23:56:24.428039: step: 208/77, loss: 6.854527612176753e-08 2023-01-23 23:56:25.727015: step: 212/77, loss: 4.2942242544086184e-06 2023-01-23 23:56:26.993321: step: 216/77, loss: 7.763283633721585e-07 2023-01-23 23:56:28.271749: step: 220/77, loss: 4.440516079284862e-07 2023-01-23 23:56:29.533974: step: 224/77, loss: 1.5358633390860632e-05 2023-01-23 23:56:30.809944: step: 228/77, loss: 5.9604580826544407e-08 2023-01-23 23:56:32.095630: step: 232/77, loss: 0.000686664308886975 2023-01-23 23:56:33.371708: step: 236/77, loss: 0.033505942672491074 2023-01-23 23:56:34.679842: step: 240/77, loss: 1.3873767784389202e-05 2023-01-23 23:56:36.008082: step: 244/77, loss: 8.046392849792028e-07 2023-01-23 23:56:37.354695: step: 248/77, loss: 6.70079534756951e-05 2023-01-23 23:56:38.636792: step: 252/77, loss: 0.0006942515028640628 2023-01-23 23:56:39.884143: step: 256/77, loss: 0.0001692011283012107 2023-01-23 23:56:41.188661: step: 260/77, loss: 0.001478560152463615 2023-01-23 23:56:42.468047: step: 264/77, loss: 8.87875648913905e-05 2023-01-23 23:56:43.754693: step: 268/77, loss: 0.009863415732979774 2023-01-23 23:56:45.020263: step: 272/77, loss: 2.6446809897606727e-06 2023-01-23 23:56:46.301707: step: 276/77, loss: 5.823625087941764e-06 2023-01-23 23:56:47.568720: step: 280/77, loss: 0.007472109980881214 2023-01-23 23:56:48.835921: step: 284/77, loss: 0.000493723142426461 2023-01-23 23:56:50.164978: step: 288/77, loss: 3.874297149764061e-08 2023-01-23 23:56:51.475815: step: 292/77, loss: 1.3560013201185939e-07 2023-01-23 23:56:52.737931: step: 296/77, loss: 0.0008401316008530557 2023-01-23 23:56:54.037338: step: 300/77, loss: 0.0001201034028781578 2023-01-23 23:56:55.327653: step: 304/77, loss: 0.0036236299201846123 2023-01-23 23:56:56.650193: step: 308/77, loss: 9.447152820030169e-07 2023-01-23 23:56:57.935382: step: 312/77, loss: 1.7672722606221214e-05 2023-01-23 23:56:59.176590: step: 316/77, loss: 1.4962723071221262e-05 2023-01-23 23:57:00.513818: step: 320/77, loss: 3.774357173824683e-05 2023-01-23 23:57:01.814759: step: 324/77, loss: 6.529239180963486e-05 2023-01-23 23:57:03.104854: step: 328/77, loss: 3.069627894092264e-07 2023-01-23 23:57:04.437478: step: 332/77, loss: 0.01833205111324787 2023-01-23 23:57:05.690009: step: 336/77, loss: 0.0007400041213259101 2023-01-23 23:57:07.000239: step: 340/77, loss: 0.00012709743168670684 2023-01-23 23:57:08.278291: step: 344/77, loss: 2.483945536368992e-06 2023-01-23 23:57:09.570564: step: 348/77, loss: 3.844440925604431e-07 2023-01-23 23:57:10.920238: step: 352/77, loss: 0.0030930594075471163 2023-01-23 23:57:12.170400: step: 356/77, loss: 0.0015991883119568229 2023-01-23 23:57:13.496769: step: 360/77, loss: 1.031789361150004e-05 2023-01-23 23:57:14.798689: step: 364/77, loss: 1.4349502635013778e-06 2023-01-23 23:57:16.073977: step: 368/77, loss: 2.8228296287124977e-05 2023-01-23 23:57:17.362476: step: 372/77, loss: 7.927232559268305e-07 2023-01-23 23:57:18.651792: step: 376/77, loss: 0.0005354603636078537 2023-01-23 23:57:19.917862: step: 380/77, loss: 0.00016630259051453322 2023-01-23 23:57:21.192506: step: 384/77, loss: 0.017812933772802353 2023-01-23 23:57:22.462443: step: 388/77, loss: 1.934991087182425e-05 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Chinese: {'template': {'p': 0.9358974358974359, 'r': 0.5748031496062992, 'f1': 0.7121951219512195}, 'slot': {'p': 0.5897435897435898, 'r': 0.019896193771626297, 'f1': 0.03849372384937238}, 'combined': 0.027415042351260327, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Korean: {'template': {'p': 0.9230769230769231, 'r': 0.5669291338582677, 'f1': 0.7024390243902439}, 'slot': {'p': 0.5897435897435898, 'r': 0.019896193771626297, 'f1': 0.03849372384937238}, 'combined': 0.027039493825900596, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Russian: {'template': {'p': 0.9358974358974359, 'r': 0.5748031496062992, 'f1': 0.7121951219512195}, 'slot': {'p': 0.5897435897435898, 'r': 0.019896193771626297, 'f1': 0.03849372384937238}, 'combined': 0.027415042351260327, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 28 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:59:02.680509: step: 4/77, loss: 0.0021865300368517637 2023-01-23 23:59:03.996308: step: 8/77, loss: 0.00019810137746389955 2023-01-23 23:59:05.256004: step: 12/77, loss: 0.004027987364679575 2023-01-23 23:59:06.514484: step: 16/77, loss: 3.8547568692592904e-05 2023-01-23 23:59:07.776557: step: 20/77, loss: 0.00019763948512263596 2023-01-23 23:59:09.026319: step: 24/77, loss: 0.015152830630540848 2023-01-23 23:59:10.277518: step: 28/77, loss: 0.00030233588768169284 2023-01-23 23:59:11.576137: step: 32/77, loss: 0.0005768241826444864 2023-01-23 23:59:12.887224: step: 36/77, loss: 1.628660584174213e-06 2023-01-23 23:59:14.141291: step: 40/77, loss: 6.025625407346524e-05 2023-01-23 23:59:15.418994: step: 44/77, loss: 8.51103959575994e-06 2023-01-23 23:59:16.721806: step: 48/77, loss: 1.4007048321218463e-07 2023-01-23 23:59:18.047423: step: 52/77, loss: 2.3750601030769758e-05 2023-01-23 23:59:19.375990: step: 56/77, loss: 0.002472936175763607 2023-01-23 23:59:20.654981: step: 60/77, loss: 0.00030853349016979337 2023-01-23 23:59:21.942660: step: 64/77, loss: 2.0950446923961863e-06 2023-01-23 23:59:23.232297: step: 68/77, loss: 2.771506615317776e-06 2023-01-23 23:59:24.503162: step: 72/77, loss: 4.5486776798497885e-06 2023-01-23 23:59:25.779038: step: 76/77, loss: 4.2551237129373476e-05 2023-01-23 23:59:27.050291: step: 80/77, loss: 0.00015797361265867949 2023-01-23 23:59:28.348492: step: 84/77, loss: 6.729036977048963e-05 2023-01-23 23:59:29.677801: step: 88/77, loss: 0.0012200362980365753 2023-01-23 23:59:30.982215: step: 92/77, loss: 1.1205382861589896e-06 2023-01-23 23:59:32.302877: step: 96/77, loss: 0.0009553980198688805 2023-01-23 23:59:33.591261: step: 100/77, loss: 3.157766695949249e-05 2023-01-23 23:59:34.866525: step: 104/77, loss: 2.947140046671848e-06 2023-01-23 23:59:36.190008: step: 108/77, loss: 0.0001485990360379219 2023-01-23 23:59:37.464704: step: 112/77, loss: 3.3513078960822895e-05 2023-01-23 23:59:38.748966: step: 116/77, loss: 0.001428778632543981 2023-01-23 23:59:40.067246: step: 120/77, loss: 6.617177405132679e-06 2023-01-23 23:59:41.310117: step: 124/77, loss: 3.6655364965554327e-06 2023-01-23 23:59:42.566590: step: 128/77, loss: 4.7577421355526894e-05 2023-01-23 23:59:43.801956: step: 132/77, loss: 0.005376014858484268 2023-01-23 23:59:45.117899: step: 136/77, loss: 0.03443664684891701 2023-01-23 23:59:46.426107: step: 140/77, loss: 0.00010895886225625873 2023-01-23 23:59:47.758417: step: 144/77, loss: 5.1116644499416e-06 2023-01-23 23:59:49.077995: step: 148/77, loss: 0.012373952195048332 2023-01-23 23:59:50.377741: step: 152/77, loss: 0.0011482408735901117 2023-01-23 23:59:51.661094: step: 156/77, loss: 0.0007012872956693172 2023-01-23 23:59:52.946357: step: 160/77, loss: 8.746685011828959e-07 2023-01-23 23:59:54.197874: step: 164/77, loss: 9.193700520881976e-07 2023-01-23 23:59:55.487026: step: 168/77, loss: 2.2305810034595197e-06 2023-01-23 23:59:56.752898: step: 172/77, loss: 0.001875612186267972 2023-01-23 23:59:58.085795: step: 176/77, loss: 6.929267783561954e-06 2023-01-23 23:59:59.350776: step: 180/77, loss: 8.188005449483171e-05 2023-01-24 00:00:00.626860: step: 184/77, loss: 2.8312191702184464e-08 2023-01-24 00:00:01.977634: step: 188/77, loss: 0.0007012022542767227 2023-01-24 00:00:03.247520: step: 192/77, loss: 4.4859907575300895e-06 2023-01-24 00:00:04.520408: step: 196/77, loss: 0.011081600561738014 2023-01-24 00:00:05.854902: step: 200/77, loss: 0.0005675565917044878 2023-01-24 00:00:07.124391: step: 204/77, loss: 0.0019754134118556976 2023-01-24 00:00:08.408119: step: 208/77, loss: 0.0015090866945683956 2023-01-24 00:00:09.708485: step: 212/77, loss: 2.6964096832671203e-05 2023-01-24 00:00:10.989287: step: 216/77, loss: 4.87226225232007e-06 2023-01-24 00:00:12.265281: step: 220/77, loss: 4.604378318617819e-07 2023-01-24 00:00:13.568417: step: 224/77, loss: 2.056343504364122e-07 2023-01-24 00:00:14.827773: step: 228/77, loss: 3.175208257744089e-05 2023-01-24 00:00:16.121771: step: 232/77, loss: 0.0016991746379062533 2023-01-24 00:00:17.386138: step: 236/77, loss: 1.1633360372798052e-05 2023-01-24 00:00:18.676056: step: 240/77, loss: 1.2814975036690157e-07 2023-01-24 00:00:19.933012: step: 244/77, loss: 1.141123993875226e-05 2023-01-24 00:00:21.202898: step: 248/77, loss: 0.0002870917378459126 2023-01-24 00:00:22.490568: step: 252/77, loss: 0.004951559007167816 2023-01-24 00:00:23.760869: step: 256/77, loss: 7.301561311123805e-08 2023-01-24 00:00:25.058757: step: 260/77, loss: 5.036663424107246e-05 2023-01-24 00:00:26.340295: step: 264/77, loss: 0.0 2023-01-24 00:00:27.624715: step: 268/77, loss: 9.910161679727025e-06 2023-01-24 00:00:28.893830: step: 272/77, loss: 0.00963597558438778 2023-01-24 00:00:30.193215: step: 276/77, loss: 0.00782719161361456 2023-01-24 00:00:31.471425: step: 280/77, loss: 8.329643037541246e-07 2023-01-24 00:00:32.763620: step: 284/77, loss: 0.00023496760695707053 2023-01-24 00:00:34.012728: step: 288/77, loss: 0.0019654352217912674 2023-01-24 00:00:35.317352: step: 292/77, loss: 5.9508392951102e-06 2023-01-24 00:00:36.592764: step: 296/77, loss: 1.2933653579239035e-06 2023-01-24 00:00:37.893358: step: 300/77, loss: 1.749242983350996e-05 2023-01-24 00:00:39.138497: step: 304/77, loss: 0.0001417692838003859 2023-01-24 00:00:40.420811: step: 308/77, loss: 0.0005447689909487963 2023-01-24 00:00:41.724425: step: 312/77, loss: 0.0003403635055292398 2023-01-24 00:00:43.005085: step: 316/77, loss: 1.9222362368509494e-07 2023-01-24 00:00:44.303428: step: 320/77, loss: 3.882642431562999e-06 2023-01-24 00:00:45.601858: step: 324/77, loss: 0.001855060108937323 2023-01-24 00:00:46.835025: step: 328/77, loss: 0.00011588455527089536 2023-01-24 00:00:48.130022: step: 332/77, loss: 2.5062854547286406e-05 2023-01-24 00:00:49.400350: step: 336/77, loss: 4.14246784430361e-07 2023-01-24 00:00:50.734949: step: 340/77, loss: 1.247184968633519e-06 2023-01-24 00:00:52.021133: step: 344/77, loss: 0.00025223763077519834 2023-01-24 00:00:53.266593: step: 348/77, loss: 8.014580089366063e-05 2023-01-24 00:00:54.569637: step: 352/77, loss: 1.4408777815333451e-06 2023-01-24 00:00:55.864770: step: 356/77, loss: 0.0001089065262931399 2023-01-24 00:00:57.117403: step: 360/77, loss: 0.000887332484126091 2023-01-24 00:00:58.451792: step: 364/77, loss: 2.2855980205349624e-05 2023-01-24 00:00:59.719064: step: 368/77, loss: 0.00034089345717802644 2023-01-24 00:01:00.997398: step: 372/77, loss: 0.000578397186473012 2023-01-24 00:01:02.261207: step: 376/77, loss: 0.012201877310872078 2023-01-24 00:01:03.584498: step: 380/77, loss: 0.0033788299188017845 2023-01-24 00:01:04.854662: step: 384/77, loss: 0.0006222074152901769 2023-01-24 00:01:06.127420: step: 388/77, loss: 7.241784487632685e-07 ================================================== Loss: 0.002 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Chinese: {'template': {'p': 0.9342105263157895, 'r': 0.5590551181102362, 'f1': 0.6995073891625616}, 'slot': {'p': 0.5434782608695652, 'r': 0.02162629757785467, 'f1': 0.041597337770382686}, 'combined': 0.029097645139873604, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Korean: {'template': {'p': 0.9324324324324325, 'r': 0.5433070866141733, 'f1': 0.6865671641791046}, 'slot': {'p': 0.5434782608695652, 'r': 0.02162629757785467, 'f1': 0.041597337770382686}, 'combined': 0.028559366230411998, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Russian: {'template': {'p': 0.9324324324324325, 'r': 0.5433070866141733, 'f1': 0.6865671641791046}, 'slot': {'p': 0.5319148936170213, 'r': 0.02162629757785467, 'f1': 0.04156275976724854}, 'combined': 0.02853562610885721, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 29 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:02:47.765625: step: 4/77, loss: 8.651711141283158e-06 2023-01-24 00:02:49.091620: step: 8/77, loss: 1.0613646736601368e-05 2023-01-24 00:02:50.384464: step: 12/77, loss: 1.1698984053509776e-05 2023-01-24 00:02:51.671848: step: 16/77, loss: 6.8453396124823485e-06 2023-01-24 00:02:52.976417: step: 20/77, loss: 0.058387961238622665 2023-01-24 00:02:54.253926: step: 24/77, loss: 2.6822082332955688e-08 2023-01-24 00:02:55.478679: step: 28/77, loss: 2.282654349983204e-06 2023-01-24 00:02:56.760831: step: 32/77, loss: 0.004614799749106169 2023-01-24 00:02:58.038669: step: 36/77, loss: 6.8609083427872974e-06 2023-01-24 00:02:59.289144: step: 40/77, loss: 0.0008533053332939744 2023-01-24 00:03:00.573508: step: 44/77, loss: 0.0052756210789084435 2023-01-24 00:03:01.852299: step: 48/77, loss: 3.4868492093664827e-07 2023-01-24 00:03:03.126487: step: 52/77, loss: 3.491688403300941e-05 2023-01-24 00:03:04.395360: step: 56/77, loss: 0.006140722893178463 2023-01-24 00:03:05.659986: step: 60/77, loss: 3.100781759712845e-05 2023-01-24 00:03:06.938404: step: 64/77, loss: 3.0994272037787596e-07 2023-01-24 00:03:08.179418: step: 68/77, loss: 0.0008896092767827213 2023-01-24 00:03:09.485872: step: 72/77, loss: 0.009300212375819683 2023-01-24 00:03:10.762215: step: 76/77, loss: 0.0004132104222662747 2023-01-24 00:03:12.059843: step: 80/77, loss: 6.638202648900915e-06 2023-01-24 00:03:13.329193: step: 84/77, loss: 0.0011040932731702924 2023-01-24 00:03:14.632386: step: 88/77, loss: 1.318868908128934e-05 2023-01-24 00:03:15.933514: step: 92/77, loss: 4.4553686961990024e-07 2023-01-24 00:03:17.214259: step: 96/77, loss: 2.3648008209420368e-05 2023-01-24 00:03:18.540096: step: 100/77, loss: 0.00017289724200963974 2023-01-24 00:03:19.842959: step: 104/77, loss: 0.0002529393823351711 2023-01-24 00:03:21.145825: step: 108/77, loss: 9.019220669870265e-06 2023-01-24 00:03:22.410988: step: 112/77, loss: 5.823463652632199e-05 2023-01-24 00:03:23.679019: step: 116/77, loss: 9.745178886078065e-07 2023-01-24 00:03:24.940106: step: 120/77, loss: 4.057395926793106e-06 2023-01-24 00:03:26.221135: step: 124/77, loss: 0.0417637974023819 2023-01-24 00:03:27.509773: step: 128/77, loss: 4.908620030619204e-05 2023-01-24 00:03:28.798107: step: 132/77, loss: 2.5956498575396836e-05 2023-01-24 00:03:30.067211: step: 136/77, loss: 0.0 2023-01-24 00:03:31.353567: step: 140/77, loss: 1.0787980500026606e-06 2023-01-24 00:03:32.621075: step: 144/77, loss: 7.003539082006682e-08 2023-01-24 00:03:33.928783: step: 148/77, loss: 2.9099076073180186e-06 2023-01-24 00:03:35.241446: step: 152/77, loss: 7.658990739400906e-07 2023-01-24 00:03:36.604750: step: 156/77, loss: 0.0002760235802270472 2023-01-24 00:03:37.878580: step: 160/77, loss: 2.2328616978484206e-05 2023-01-24 00:03:39.157389: step: 164/77, loss: 1.5079613149282522e-06 2023-01-24 00:03:40.433267: step: 168/77, loss: 7.09287860445329e-07 2023-01-24 00:03:41.734203: step: 172/77, loss: 6.417190888896585e-05 2023-01-24 00:03:43.048114: step: 176/77, loss: 1.533610702608712e-05 2023-01-24 00:03:44.347374: step: 180/77, loss: 0.00015153044660110027 2023-01-24 00:03:45.611487: step: 184/77, loss: 0.00015654291200917214 2023-01-24 00:03:46.906597: step: 188/77, loss: 2.3394709103286004e-07 2023-01-24 00:03:48.165626: step: 192/77, loss: 0.0001352376857539639 2023-01-24 00:03:49.509520: step: 196/77, loss: 3.8812271668575704e-05 2023-01-24 00:03:50.782503: step: 200/77, loss: 0.41700997948646545 2023-01-24 00:03:52.070288: step: 204/77, loss: 9.223648476108792e-07 2023-01-24 00:03:53.352326: step: 208/77, loss: 0.0008059104438871145 2023-01-24 00:03:54.663240: step: 212/77, loss: 4.970795998815447e-05 2023-01-24 00:03:55.912238: step: 216/77, loss: 0.0023068408481776714 2023-01-24 00:03:57.220783: step: 220/77, loss: 0.011769354343414307 2023-01-24 00:03:58.504845: step: 224/77, loss: 1.411839184584096e-05 2023-01-24 00:03:59.761772: step: 228/77, loss: 0.0033866390585899353 2023-01-24 00:04:01.054590: step: 232/77, loss: 0.0005937239620834589 2023-01-24 00:04:02.367544: step: 236/77, loss: 1.1905756309715798e-06 2023-01-24 00:04:03.687508: step: 240/77, loss: 0.05240585654973984 2023-01-24 00:04:04.980636: step: 244/77, loss: 8.177459676517174e-05 2023-01-24 00:04:06.282468: step: 248/77, loss: 0.03990630805492401 2023-01-24 00:04:07.546785: step: 252/77, loss: 0.009225370362401009 2023-01-24 00:04:08.833327: step: 256/77, loss: 0.004749360494315624 2023-01-24 00:04:10.156063: step: 260/77, loss: 0.04941349849104881 2023-01-24 00:04:11.469791: step: 264/77, loss: 0.0037642589304596186 2023-01-24 00:04:12.726219: step: 268/77, loss: 5.3255724196787924e-05 2023-01-24 00:04:14.012546: step: 272/77, loss: 1.9163348042638972e-05 2023-01-24 00:04:15.304947: step: 276/77, loss: 2.8681495223281672e-06 2023-01-24 00:04:16.650783: step: 280/77, loss: 2.086154324842937e-07 2023-01-24 00:04:17.968815: step: 284/77, loss: 3.7699453514505876e-07 2023-01-24 00:04:19.257030: step: 288/77, loss: 2.2428501324611716e-05 2023-01-24 00:04:20.541218: step: 292/77, loss: 0.0074419789016246796 2023-01-24 00:04:21.819204: step: 296/77, loss: 3.7662855902453884e-05 2023-01-24 00:04:23.077037: step: 300/77, loss: 1.2514733498392161e-05 2023-01-24 00:04:24.354579: step: 304/77, loss: 0.0006708307191729546 2023-01-24 00:04:25.646961: step: 308/77, loss: 0.00016213061462622136 2023-01-24 00:04:26.927096: step: 312/77, loss: 0.017585409805178642 2023-01-24 00:04:28.201891: step: 316/77, loss: 0.0018693305319175124 2023-01-24 00:04:29.439792: step: 320/77, loss: 2.6822082332955688e-08 2023-01-24 00:04:30.745889: step: 324/77, loss: 2.0861621763401672e-08 2023-01-24 00:04:32.083575: step: 328/77, loss: 0.0006003840826451778 2023-01-24 00:04:33.385539: step: 332/77, loss: 0.0033928346820175648 2023-01-24 00:04:34.644461: step: 336/77, loss: 1.1474237908259965e-05 2023-01-24 00:04:35.939840: step: 340/77, loss: 3.406353062018752e-05 2023-01-24 00:04:37.221284: step: 344/77, loss: 1.2427079809640418e-06 2023-01-24 00:04:38.482578: step: 348/77, loss: 0.0001311218657065183 2023-01-24 00:04:39.747759: step: 352/77, loss: 0.0008393143652938306 2023-01-24 00:04:41.040833: step: 356/77, loss: 3.580274551495677e-06 2023-01-24 00:04:42.318039: step: 360/77, loss: 3.2870757422642782e-06 2023-01-24 00:04:43.587853: step: 364/77, loss: 3.6221701975591714e-06 2023-01-24 00:04:44.862428: step: 368/77, loss: 5.960463678178485e-09 2023-01-24 00:04:46.194122: step: 372/77, loss: 0.002523561008274555 2023-01-24 00:04:47.445174: step: 376/77, loss: 1.508878904132871e-05 2023-01-24 00:04:48.740515: step: 380/77, loss: 6.556501119803215e-08 2023-01-24 00:04:50.048678: step: 384/77, loss: 2.235172225084625e-08 2023-01-24 00:04:51.362890: step: 388/77, loss: 5.226740995567525e-06 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 29} Test Chinese: {'template': {'p': 0.9146341463414634, 'r': 0.5905511811023622, 'f1': 0.7177033492822966}, 'slot': {'p': 0.5652173913043478, 'r': 0.02249134948096886, 'f1': 0.04326123128119801}, 'combined': 0.031048730584591868, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 29} Test Korean: {'template': {'p': 0.9146341463414634, 'r': 0.5905511811023622, 'f1': 0.7177033492822966}, 'slot': {'p': 0.5531914893617021, 'r': 0.02249134948096886, 'f1': 0.04322527015793849}, 'combined': 0.03102292116598456, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 29} Test Russian: {'template': {'p': 0.9146341463414634, 'r': 0.5905511811023622, 'f1': 0.7177033492822966}, 'slot': {'p': 0.5652173913043478, 'r': 0.02249134948096886, 'f1': 0.04326123128119801}, 'combined': 0.031048730584591868, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2}