Command that produces this log: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> trans_rep.weight: torch.Size([1024, 2048]) >>> trans_rep.bias: torch.Size([1024]) >>> hidden_ffns.Corruplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Corruplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Cybercrimeplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Cybercrimeplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Disasterplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Disasterplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Displacementplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Displacementplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Epidemiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Epidemiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Etiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Etiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Protestplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Protestplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Terrorplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Terrorplate.layers.0.bias: torch.Size([768]) >>> template_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> type_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Corruplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Corruplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Disasterplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Disasterplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Displacementplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Displacementplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Epidemiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Epidemiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Etiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Etiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Protestplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Protestplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Terrorplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Terrorplate.layers.1.bias: torch.Size([6]) >>> completion_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Corruplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Corruplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Disasterplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Disasterplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Displacementplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Displacementplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Epidemiplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Epidemiplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Etiplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Etiplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Protestplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Protestplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Terrorplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Terrorplate.layers.1.bias: torch.Size([3]) >>> overtime_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) n_trainable_params: 582182328, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:05:53.086743: step: 4/77, loss: 1.005326271057129 2023-01-23 22:05:54.370133: step: 8/77, loss: 0.9969702959060669 2023-01-23 22:05:55.609006: step: 12/77, loss: 0.9919466376304626 2023-01-23 22:05:56.846441: step: 16/77, loss: 0.9922894239425659 2023-01-23 22:05:58.103668: step: 20/77, loss: 0.987234354019165 2023-01-23 22:05:59.376892: step: 24/77, loss: 0.995108962059021 2023-01-23 22:06:00.678558: step: 28/77, loss: 0.9919507503509521 2023-01-23 22:06:01.960352: step: 32/77, loss: 0.9804954528808594 2023-01-23 22:06:03.243872: step: 36/77, loss: 0.9758777618408203 2023-01-23 22:06:04.514934: step: 40/77, loss: 0.9724856615066528 2023-01-23 22:06:05.831368: step: 44/77, loss: 0.9727693796157837 2023-01-23 22:06:07.103774: step: 48/77, loss: 0.9606623649597168 2023-01-23 22:06:08.364988: step: 52/77, loss: 0.9492292404174805 2023-01-23 22:06:09.672316: step: 56/77, loss: 0.9366544485092163 2023-01-23 22:06:10.966208: step: 60/77, loss: 0.9421094655990601 2023-01-23 22:06:12.255649: step: 64/77, loss: 0.914215624332428 2023-01-23 22:06:13.550945: step: 68/77, loss: 0.9166408777236938 2023-01-23 22:06:14.819848: step: 72/77, loss: 0.8890451788902283 2023-01-23 22:06:16.088229: step: 76/77, loss: 0.888709306716919 2023-01-23 22:06:17.355692: step: 80/77, loss: 0.8704376220703125 2023-01-23 22:06:18.647427: step: 84/77, loss: 0.8614633083343506 2023-01-23 22:06:19.952225: step: 88/77, loss: 0.8560481071472168 2023-01-23 22:06:21.235638: step: 92/77, loss: 0.8408702611923218 2023-01-23 22:06:22.495238: step: 96/77, loss: 0.8011143803596497 2023-01-23 22:06:23.761295: step: 100/77, loss: 0.8109110593795776 2023-01-23 22:06:25.053492: step: 104/77, loss: 0.7847245335578918 2023-01-23 22:06:26.346932: step: 108/77, loss: 0.7961324453353882 2023-01-23 22:06:27.617736: step: 112/77, loss: 0.7659816145896912 2023-01-23 22:06:28.898355: step: 116/77, loss: 0.7412824034690857 2023-01-23 22:06:30.220025: step: 120/77, loss: 0.7645125985145569 2023-01-23 22:06:31.466484: step: 124/77, loss: 0.7162237167358398 2023-01-23 22:06:32.764936: step: 128/77, loss: 0.6767681837081909 2023-01-23 22:06:34.159246: step: 132/77, loss: 0.6699859499931335 2023-01-23 22:06:35.424259: step: 136/77, loss: 0.6623581051826477 2023-01-23 22:06:36.730119: step: 140/77, loss: 0.5981768369674683 2023-01-23 22:06:38.037051: step: 144/77, loss: 0.5953983068466187 2023-01-23 22:06:39.320883: step: 148/77, loss: 0.5925966501235962 2023-01-23 22:06:40.575936: step: 152/77, loss: 0.5283046364784241 2023-01-23 22:06:41.894160: step: 156/77, loss: 0.5977451205253601 2023-01-23 22:06:43.217639: step: 160/77, loss: 0.5185505747795105 2023-01-23 22:06:44.575420: step: 164/77, loss: 0.48585712909698486 2023-01-23 22:06:45.905510: step: 168/77, loss: 0.48655766248703003 2023-01-23 22:06:47.172417: step: 172/77, loss: 0.4535609185695648 2023-01-23 22:06:48.457399: step: 176/77, loss: 0.4237361252307892 2023-01-23 22:06:49.790095: step: 180/77, loss: 0.4515753388404846 2023-01-23 22:06:51.024350: step: 184/77, loss: 0.3867449164390564 2023-01-23 22:06:52.257777: step: 188/77, loss: 0.34473663568496704 2023-01-23 22:06:53.561930: step: 192/77, loss: 0.32582467794418335 2023-01-23 22:06:54.869077: step: 196/77, loss: 0.3416479825973511 2023-01-23 22:06:56.186392: step: 200/77, loss: 0.38761529326438904 2023-01-23 22:06:57.480582: step: 204/77, loss: 0.33058345317840576 2023-01-23 22:06:58.791992: step: 208/77, loss: 0.30359816551208496 2023-01-23 22:07:00.100629: step: 212/77, loss: 0.23331186175346375 2023-01-23 22:07:01.410139: step: 216/77, loss: 0.2237580269575119 2023-01-23 22:07:02.757312: step: 220/77, loss: 0.1935083121061325 2023-01-23 22:07:04.101540: step: 224/77, loss: 0.20559267699718475 2023-01-23 22:07:05.388722: step: 228/77, loss: 0.2811965048313141 2023-01-23 22:07:06.664689: step: 232/77, loss: 0.1479192078113556 2023-01-23 22:07:07.996655: step: 236/77, loss: 0.1190163865685463 2023-01-23 22:07:09.309755: step: 240/77, loss: 0.08200334012508392 2023-01-23 22:07:10.617661: step: 244/77, loss: 0.10144772380590439 2023-01-23 22:07:11.909076: step: 248/77, loss: 0.1776786744594574 2023-01-23 22:07:13.227885: step: 252/77, loss: 0.09870608150959015 2023-01-23 22:07:14.497070: step: 256/77, loss: 0.07344207912683487 2023-01-23 22:07:15.791590: step: 260/77, loss: 0.13493704795837402 2023-01-23 22:07:17.067411: step: 264/77, loss: 0.112431600689888 2023-01-23 22:07:18.355222: step: 268/77, loss: 0.10087061673402786 2023-01-23 22:07:19.643528: step: 272/77, loss: 0.07023502141237259 2023-01-23 22:07:20.978358: step: 276/77, loss: 0.11176516115665436 2023-01-23 22:07:22.307798: step: 280/77, loss: 0.08656412363052368 2023-01-23 22:07:23.610946: step: 284/77, loss: 0.09016451239585876 2023-01-23 22:07:24.926267: step: 288/77, loss: 0.20893847942352295 2023-01-23 22:07:26.204668: step: 292/77, loss: 0.08841076493263245 2023-01-23 22:07:27.491729: step: 296/77, loss: 0.20717525482177734 2023-01-23 22:07:28.761060: step: 300/77, loss: 0.14878374338150024 2023-01-23 22:07:30.078275: step: 304/77, loss: 0.16760967671871185 2023-01-23 22:07:31.369506: step: 308/77, loss: 0.0642714574933052 2023-01-23 22:07:32.672171: step: 312/77, loss: 0.10876336693763733 2023-01-23 22:07:33.937668: step: 316/77, loss: 0.06730751693248749 2023-01-23 22:07:35.283972: step: 320/77, loss: 0.0801793783903122 2023-01-23 22:07:36.550140: step: 324/77, loss: 0.14890411496162415 2023-01-23 22:07:37.878829: step: 328/77, loss: 0.19140523672103882 2023-01-23 22:07:39.124298: step: 332/77, loss: 0.12841562926769257 2023-01-23 22:07:40.412872: step: 336/77, loss: 0.06181987002491951 2023-01-23 22:07:41.700385: step: 340/77, loss: 0.07434514164924622 2023-01-23 22:07:42.983601: step: 344/77, loss: 0.058149345219135284 2023-01-23 22:07:44.336015: step: 348/77, loss: 0.12842099368572235 2023-01-23 22:07:45.669828: step: 352/77, loss: 0.06279709935188293 2023-01-23 22:07:46.947281: step: 356/77, loss: 0.10988639295101166 2023-01-23 22:07:48.249565: step: 360/77, loss: 0.059507448226213455 2023-01-23 22:07:49.546013: step: 364/77, loss: 0.07639916241168976 2023-01-23 22:07:50.798290: step: 368/77, loss: 0.07576992362737656 2023-01-23 22:07:52.074330: step: 372/77, loss: 0.11858648806810379 2023-01-23 22:07:53.362485: step: 376/77, loss: 0.16270530223846436 2023-01-23 22:07:54.677516: step: 380/77, loss: 0.10745513439178467 2023-01-23 22:07:55.975409: step: 384/77, loss: 0.07845322042703629 2023-01-23 22:07:57.228098: step: 388/77, loss: 0.0594959557056427 ================================================== Loss: 0.449 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:10:06.374320: step: 4/77, loss: 0.07319396734237671 2023-01-23 22:10:07.632608: step: 8/77, loss: 0.2127627432346344 2023-01-23 22:10:08.898968: step: 12/77, loss: 0.06945189833641052 2023-01-23 22:10:10.166357: step: 16/77, loss: 0.06614106148481369 2023-01-23 22:10:11.455577: step: 20/77, loss: 0.12928959727287292 2023-01-23 22:10:12.732001: step: 24/77, loss: 0.06803569197654724 2023-01-23 22:10:14.042454: step: 28/77, loss: 0.11719777435064316 2023-01-23 22:10:15.303270: step: 32/77, loss: 0.08039490878582001 2023-01-23 22:10:16.599712: step: 36/77, loss: 0.08408656716346741 2023-01-23 22:10:17.874273: step: 40/77, loss: 0.16345737874507904 2023-01-23 22:10:19.191557: step: 44/77, loss: 0.16177189350128174 2023-01-23 22:10:20.445477: step: 48/77, loss: 0.06657033413648605 2023-01-23 22:10:21.737729: step: 52/77, loss: 0.11178885400295258 2023-01-23 22:10:23.018611: step: 56/77, loss: 0.17989614605903625 2023-01-23 22:10:24.302749: step: 60/77, loss: 0.11134673655033112 2023-01-23 22:10:25.614698: step: 64/77, loss: 0.1450967788696289 2023-01-23 22:10:26.905352: step: 68/77, loss: 0.06846902519464493 2023-01-23 22:10:28.160573: step: 72/77, loss: 0.09169869869947433 2023-01-23 22:10:29.477519: step: 76/77, loss: 0.17338258028030396 2023-01-23 22:10:30.746412: step: 80/77, loss: 0.06149708479642868 2023-01-23 22:10:32.050220: step: 84/77, loss: 0.03807852417230606 2023-01-23 22:10:33.373783: step: 88/77, loss: 0.13195359706878662 2023-01-23 22:10:34.643027: step: 92/77, loss: 0.13806062936782837 2023-01-23 22:10:35.920128: step: 96/77, loss: 0.07052130997180939 2023-01-23 22:10:37.183370: step: 100/77, loss: 0.07880409061908722 2023-01-23 22:10:38.476478: step: 104/77, loss: 0.11023557186126709 2023-01-23 22:10:39.765059: step: 108/77, loss: 0.054397180676460266 2023-01-23 22:10:41.083813: step: 112/77, loss: 0.055380046367645264 2023-01-23 22:10:42.359180: step: 116/77, loss: 0.029421858489513397 2023-01-23 22:10:43.611071: step: 120/77, loss: 0.13612958788871765 2023-01-23 22:10:44.885778: step: 124/77, loss: 0.055370356887578964 2023-01-23 22:10:46.149003: step: 128/77, loss: 0.09766070544719696 2023-01-23 22:10:47.420492: step: 132/77, loss: 0.25343966484069824 2023-01-23 22:10:48.730558: step: 136/77, loss: 0.20380039513111115 2023-01-23 22:10:50.009544: step: 140/77, loss: 0.08381229639053345 2023-01-23 22:10:51.244589: step: 144/77, loss: 0.08572602272033691 2023-01-23 22:10:52.579376: step: 148/77, loss: 0.09957505017518997 2023-01-23 22:10:53.864340: step: 152/77, loss: 0.1330590844154358 2023-01-23 22:10:55.157353: step: 156/77, loss: 0.05906382203102112 2023-01-23 22:10:56.432709: step: 160/77, loss: 0.061597198247909546 2023-01-23 22:10:57.730392: step: 164/77, loss: 0.09688493609428406 2023-01-23 22:10:59.023706: step: 168/77, loss: 0.10050778836011887 2023-01-23 22:11:00.309924: step: 172/77, loss: 0.0780385434627533 2023-01-23 22:11:01.603496: step: 176/77, loss: 0.10098770260810852 2023-01-23 22:11:02.841917: step: 180/77, loss: 0.0765882357954979 2023-01-23 22:11:04.181995: step: 184/77, loss: 0.08834411203861237 2023-01-23 22:11:05.482957: step: 188/77, loss: 0.05788188427686691 2023-01-23 22:11:06.776537: step: 192/77, loss: 0.0765821561217308 2023-01-23 22:11:08.104564: step: 196/77, loss: 0.08416645228862762 2023-01-23 22:11:09.370712: step: 200/77, loss: 0.028178736567497253 2023-01-23 22:11:10.656093: step: 204/77, loss: 0.1760944128036499 2023-01-23 22:11:11.948073: step: 208/77, loss: 0.048954010009765625 2023-01-23 22:11:13.218442: step: 212/77, loss: 0.05932794138789177 2023-01-23 22:11:14.498296: step: 216/77, loss: 0.04491445794701576 2023-01-23 22:11:15.837900: step: 220/77, loss: 0.1340390145778656 2023-01-23 22:11:17.097221: step: 224/77, loss: 0.03974044695496559 2023-01-23 22:11:18.382444: step: 228/77, loss: 0.08148594945669174 2023-01-23 22:11:19.725441: step: 232/77, loss: 0.17769792675971985 2023-01-23 22:11:21.014033: step: 236/77, loss: 0.05620008334517479 2023-01-23 22:11:22.320439: step: 240/77, loss: 0.07260948419570923 2023-01-23 22:11:23.610219: step: 244/77, loss: 0.030221108347177505 2023-01-23 22:11:24.916171: step: 248/77, loss: 0.17984890937805176 2023-01-23 22:11:26.215712: step: 252/77, loss: 0.029455162584781647 2023-01-23 22:11:27.466789: step: 256/77, loss: 0.09766645729541779 2023-01-23 22:11:28.784290: step: 260/77, loss: 0.13616687059402466 2023-01-23 22:11:30.106896: step: 264/77, loss: 0.3544989228248596 2023-01-23 22:11:31.400500: step: 268/77, loss: 0.0440225750207901 2023-01-23 22:11:32.713103: step: 272/77, loss: 0.04961168020963669 2023-01-23 22:11:34.028005: step: 276/77, loss: 0.03362132981419563 2023-01-23 22:11:35.351340: step: 280/77, loss: 0.06177300959825516 2023-01-23 22:11:36.679475: step: 284/77, loss: 0.09610987454652786 2023-01-23 22:11:37.996446: step: 288/77, loss: 0.10167451202869415 2023-01-23 22:11:39.338751: step: 292/77, loss: 0.07030784338712692 2023-01-23 22:11:40.627518: step: 296/77, loss: 0.12148582190275192 2023-01-23 22:11:41.952603: step: 300/77, loss: 0.11119399964809418 2023-01-23 22:11:43.239322: step: 304/77, loss: 0.0914122611284256 2023-01-23 22:11:44.508572: step: 308/77, loss: 0.03533243387937546 2023-01-23 22:11:45.832415: step: 312/77, loss: 0.06032838299870491 2023-01-23 22:11:47.145260: step: 316/77, loss: 0.045557308942079544 2023-01-23 22:11:48.403705: step: 320/77, loss: 0.07548585534095764 2023-01-23 22:11:49.697234: step: 324/77, loss: 0.09146370738744736 2023-01-23 22:11:51.009639: step: 328/77, loss: 0.09769196808338165 2023-01-23 22:11:52.294564: step: 332/77, loss: 0.14479124546051025 2023-01-23 22:11:53.565541: step: 336/77, loss: 0.11375805735588074 2023-01-23 22:11:54.897258: step: 340/77, loss: 0.10664797574281693 2023-01-23 22:11:56.201787: step: 344/77, loss: 0.07036581635475159 2023-01-23 22:11:57.516728: step: 348/77, loss: 0.10672543942928314 2023-01-23 22:11:58.755479: step: 352/77, loss: 0.0998711809515953 2023-01-23 22:12:00.071851: step: 356/77, loss: 0.04224449396133423 2023-01-23 22:12:01.367374: step: 360/77, loss: 0.08522751927375793 2023-01-23 22:12:02.706219: step: 364/77, loss: 0.07499724626541138 2023-01-23 22:12:03.966067: step: 368/77, loss: 0.09146278351545334 2023-01-23 22:12:05.287748: step: 372/77, loss: 0.11920887231826782 2023-01-23 22:12:06.602111: step: 376/77, loss: 0.14591817557811737 2023-01-23 22:12:07.854790: step: 380/77, loss: 0.13997526466846466 2023-01-23 22:12:09.158398: step: 384/77, loss: 0.07448755204677582 2023-01-23 22:12:10.424550: step: 388/77, loss: 0.10113322734832764 ================================================== Loss: 0.097 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 2 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:13:53.685334: step: 4/77, loss: 0.16292589902877808 2023-01-23 22:13:54.965404: step: 8/77, loss: 0.10868260264396667 2023-01-23 22:13:56.246799: step: 12/77, loss: 0.08619604259729385 2023-01-23 22:13:57.491171: step: 16/77, loss: 0.17940576374530792 2023-01-23 22:13:58.800232: step: 20/77, loss: 0.05281125381588936 2023-01-23 22:14:00.095208: step: 24/77, loss: 0.10502075403928757 2023-01-23 22:14:01.416480: step: 28/77, loss: 0.05390072986483574 2023-01-23 22:14:02.717234: step: 32/77, loss: 0.09048666805028915 2023-01-23 22:14:04.023699: step: 36/77, loss: 0.030864335596561432 2023-01-23 22:14:05.299881: step: 40/77, loss: 0.02246858924627304 2023-01-23 22:14:06.570230: step: 44/77, loss: 0.18404263257980347 2023-01-23 22:14:07.853537: step: 48/77, loss: 0.0281207375228405 2023-01-23 22:14:09.171897: step: 52/77, loss: 0.13492928445339203 2023-01-23 22:14:10.466022: step: 56/77, loss: 0.04565151408314705 2023-01-23 22:14:11.740260: step: 60/77, loss: 0.09567532688379288 2023-01-23 22:14:13.038210: step: 64/77, loss: 0.11220959573984146 2023-01-23 22:14:14.329059: step: 68/77, loss: 0.0386422835290432 2023-01-23 22:14:15.634262: step: 72/77, loss: 0.04394672438502312 2023-01-23 22:14:16.901550: step: 76/77, loss: 0.03471282497048378 2023-01-23 22:14:18.148739: step: 80/77, loss: 0.030496366322040558 2023-01-23 22:14:19.458764: step: 84/77, loss: 0.06462342292070389 2023-01-23 22:14:20.740526: step: 88/77, loss: 0.02463102713227272 2023-01-23 22:14:22.018423: step: 92/77, loss: 0.02454773709177971 2023-01-23 22:14:23.267158: step: 96/77, loss: 0.03570985421538353 2023-01-23 22:14:24.574956: step: 100/77, loss: 0.04813680425286293 2023-01-23 22:14:25.909145: step: 104/77, loss: 0.07638773322105408 2023-01-23 22:14:27.221299: step: 108/77, loss: 0.10734117776155472 2023-01-23 22:14:28.516886: step: 112/77, loss: 0.05339484289288521 2023-01-23 22:14:29.795613: step: 116/77, loss: 0.08813977986574173 2023-01-23 22:14:31.123638: step: 120/77, loss: 0.029325906187295914 2023-01-23 22:14:32.458224: step: 124/77, loss: 0.039254121482372284 2023-01-23 22:14:33.763390: step: 128/77, loss: 0.0848977193236351 2023-01-23 22:14:35.072570: step: 132/77, loss: 0.08072513341903687 2023-01-23 22:14:36.400814: step: 136/77, loss: 0.014954173937439919 2023-01-23 22:14:37.653391: step: 140/77, loss: 0.059734243899583817 2023-01-23 22:14:38.889416: step: 144/77, loss: 0.00863991491496563 2023-01-23 22:14:40.199084: step: 148/77, loss: 0.020591311156749725 2023-01-23 22:14:41.499872: step: 152/77, loss: 0.12235350906848907 2023-01-23 22:14:42.786589: step: 156/77, loss: 0.0375383123755455 2023-01-23 22:14:44.062004: step: 160/77, loss: 0.028969548642635345 2023-01-23 22:14:45.365482: step: 164/77, loss: 0.031519703567028046 2023-01-23 22:14:46.658422: step: 168/77, loss: 0.06944873929023743 2023-01-23 22:14:47.959171: step: 172/77, loss: 0.015144776552915573 2023-01-23 22:14:49.251871: step: 176/77, loss: 0.018287071958184242 2023-01-23 22:14:50.493828: step: 180/77, loss: 0.1540391743183136 2023-01-23 22:14:51.835984: step: 184/77, loss: 0.07852562516927719 2023-01-23 22:14:53.207792: step: 188/77, loss: 0.03432613983750343 2023-01-23 22:14:54.499085: step: 192/77, loss: 0.023319926112890244 2023-01-23 22:14:55.795764: step: 196/77, loss: 0.00791256595402956 2023-01-23 22:14:57.096807: step: 200/77, loss: 0.008311287499964237 2023-01-23 22:14:58.369250: step: 204/77, loss: 0.031680233776569366 2023-01-23 22:14:59.633298: step: 208/77, loss: 0.033560387790203094 2023-01-23 22:15:00.935679: step: 212/77, loss: 0.22561319172382355 2023-01-23 22:15:02.268557: step: 216/77, loss: 0.02809392288327217 2023-01-23 22:15:03.571977: step: 220/77, loss: 0.01711028814315796 2023-01-23 22:15:04.822550: step: 224/77, loss: 0.04609297588467598 2023-01-23 22:15:06.136614: step: 228/77, loss: 0.16207429766654968 2023-01-23 22:15:07.417793: step: 232/77, loss: 0.04895063489675522 2023-01-23 22:15:08.703714: step: 236/77, loss: 0.1597345918416977 2023-01-23 22:15:09.981328: step: 240/77, loss: 0.08007530868053436 2023-01-23 22:15:11.271600: step: 244/77, loss: 0.0169462151825428 2023-01-23 22:15:12.551837: step: 248/77, loss: 0.01133648119866848 2023-01-23 22:15:13.800327: step: 252/77, loss: 0.018427202478051186 2023-01-23 22:15:15.072094: step: 256/77, loss: 0.018696516752243042 2023-01-23 22:15:16.350358: step: 260/77, loss: 0.07468952238559723 2023-01-23 22:15:17.666697: step: 264/77, loss: 0.07245441526174545 2023-01-23 22:15:19.004887: step: 268/77, loss: 0.00603498425334692 2023-01-23 22:15:20.296878: step: 272/77, loss: 0.015061470679938793 2023-01-23 22:15:21.549247: step: 276/77, loss: 0.0198321845382452 2023-01-23 22:15:22.853629: step: 280/77, loss: 0.020931649953126907 2023-01-23 22:15:24.153536: step: 284/77, loss: 0.0679539144039154 2023-01-23 22:15:25.425380: step: 288/77, loss: 0.07533137500286102 2023-01-23 22:15:26.703307: step: 292/77, loss: 0.07942559570074081 2023-01-23 22:15:27.998287: step: 296/77, loss: 0.05369891971349716 2023-01-23 22:15:29.255795: step: 300/77, loss: 0.11053535342216492 2023-01-23 22:15:30.559865: step: 304/77, loss: 0.015943240374326706 2023-01-23 22:15:31.807208: step: 308/77, loss: 0.0297300573438406 2023-01-23 22:15:33.112902: step: 312/77, loss: 0.005931754130870104 2023-01-23 22:15:34.395186: step: 316/77, loss: 0.03550818935036659 2023-01-23 22:15:35.653238: step: 320/77, loss: 0.026807796210050583 2023-01-23 22:15:36.892833: step: 324/77, loss: 0.0653093159198761 2023-01-23 22:15:38.218816: step: 328/77, loss: 0.1455453783273697 2023-01-23 22:15:39.576092: step: 332/77, loss: 0.039670467376708984 2023-01-23 22:15:40.904174: step: 336/77, loss: 0.0678885355591774 2023-01-23 22:15:42.177083: step: 340/77, loss: 0.016748683527112007 2023-01-23 22:15:43.473725: step: 344/77, loss: 0.023848265409469604 2023-01-23 22:15:44.768178: step: 348/77, loss: 0.02735232189297676 2023-01-23 22:15:46.118195: step: 352/77, loss: 0.040351394563913345 2023-01-23 22:15:47.379613: step: 356/77, loss: 0.053319547325372696 2023-01-23 22:15:48.668923: step: 360/77, loss: 0.08323514461517334 2023-01-23 22:15:49.921637: step: 364/77, loss: 0.06106017902493477 2023-01-23 22:15:51.231547: step: 368/77, loss: 0.012369946576654911 2023-01-23 22:15:52.549277: step: 372/77, loss: 0.1313581019639969 2023-01-23 22:15:53.843047: step: 376/77, loss: 0.039641156792640686 2023-01-23 22:15:55.104266: step: 380/77, loss: 0.09567482769489288 2023-01-23 22:15:56.403764: step: 384/77, loss: 0.13298000395298004 2023-01-23 22:15:57.691132: step: 388/77, loss: 0.020722243934869766 ================================================== Loss: 0.060 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.2833333333333333, 'f1': 0.4415584415584416}, 'slot': {'p': 0.5, 'r': 0.003780718336483932, 'f1': 0.0075046904315197}, 'combined': 0.0033137594113203874, 'epoch': 2} Test Chinese: {'template': {'p': 0.9538461538461539, 'r': 0.4732824427480916, 'f1': 0.6326530612244898}, 'slot': {'p': 0.42857142857142855, 'r': 0.0027100271002710027, 'f1': 0.005385996409335727}, 'combined': 0.0034074671161103583, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.2833333333333333, 'f1': 0.4415584415584416}, 'slot': {'p': 0.5, 'r': 0.003780718336483932, 'f1': 0.0075046904315197}, 'combined': 0.0033137594113203874, 'epoch': 2} Test Korean: {'template': {'p': 0.953125, 'r': 0.46564885496183206, 'f1': 0.6256410256410256}, 'slot': {'p': 0.42857142857142855, 'r': 0.0027100271002710027, 'f1': 0.005385996409335727}, 'combined': 0.0033697003176356858, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.3, 'f1': 0.4615384615384615}, 'slot': {'p': 0.5, 'r': 0.005671077504725898, 'f1': 0.011214953271028037}, 'combined': 0.005176132278936017, 'epoch': 2} Test Russian: {'template': {'p': 0.9538461538461539, 'r': 0.4732824427480916, 'f1': 0.6326530612244898}, 'slot': {'p': 0.42857142857142855, 'r': 0.0027100271002710027, 'f1': 0.005385996409335727}, 'combined': 0.0034074671161103583, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.2833333333333333, 'f1': 0.4415584415584416}, 'slot': {'p': 0.5, 'r': 0.003780718336483932, 'f1': 0.0075046904315197}, 'combined': 0.0033137594113203874, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9538461538461539, 'r': 0.4732824427480916, 'f1': 0.6326530612244898}, 'slot': {'p': 0.42857142857142855, 'r': 0.0027100271002710027, 'f1': 0.005385996409335727}, 'combined': 0.0034074671161103583, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.2833333333333333, 'f1': 0.4415584415584416}, 'slot': {'p': 0.5, 'r': 0.003780718336483932, 'f1': 0.0075046904315197}, 'combined': 0.0033137594113203874, 'epoch': 2} Test for Korean: {'template': {'p': 0.953125, 'r': 0.46564885496183206, 'f1': 0.6256410256410256}, 'slot': {'p': 0.42857142857142855, 'r': 0.0027100271002710027, 'f1': 0.005385996409335727}, 'combined': 0.0033697003176356858, 'epoch': 2} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.3, 'f1': 0.4615384615384615}, 'slot': {'p': 0.5, 'r': 0.005671077504725898, 'f1': 0.011214953271028037}, 'combined': 0.005176132278936017, 'epoch': 2} Test for Russian: {'template': {'p': 0.9538461538461539, 'r': 0.4732824427480916, 'f1': 0.6326530612244898}, 'slot': {'p': 0.42857142857142855, 'r': 0.0027100271002710027, 'f1': 0.005385996409335727}, 'combined': 0.0034074671161103583, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:18:00.043864: step: 4/77, loss: 0.05876729637384415 2023-01-23 22:18:01.332602: step: 8/77, loss: 0.007638957351446152 2023-01-23 22:18:02.629104: step: 12/77, loss: 0.017381764948368073 2023-01-23 22:18:03.901856: step: 16/77, loss: 0.05007513239979744 2023-01-23 22:18:05.200666: step: 20/77, loss: 0.0335371233522892 2023-01-23 22:18:06.496838: step: 24/77, loss: 0.012025022879242897 2023-01-23 22:18:07.785151: step: 28/77, loss: 0.027347933501005173 2023-01-23 22:18:09.083144: step: 32/77, loss: 0.00734763452783227 2023-01-23 22:18:10.404943: step: 36/77, loss: 0.04710661992430687 2023-01-23 22:18:11.680682: step: 40/77, loss: 0.025218769907951355 2023-01-23 22:18:12.961195: step: 44/77, loss: 0.04439151659607887 2023-01-23 22:18:14.313014: step: 48/77, loss: 0.010919013060629368 2023-01-23 22:18:15.615753: step: 52/77, loss: 0.12057239562273026 2023-01-23 22:18:16.880288: step: 56/77, loss: 0.022180480882525444 2023-01-23 22:18:18.138390: step: 60/77, loss: 0.024242157116532326 2023-01-23 22:18:19.486744: step: 64/77, loss: 0.03236313909292221 2023-01-23 22:18:20.795727: step: 68/77, loss: 0.07304063439369202 2023-01-23 22:18:22.067457: step: 72/77, loss: 0.00362603017129004 2023-01-23 22:18:23.393560: step: 76/77, loss: 0.08075417578220367 2023-01-23 22:18:24.672965: step: 80/77, loss: 0.014852027408778667 2023-01-23 22:18:25.960962: step: 84/77, loss: 0.003454985562711954 2023-01-23 22:18:27.267170: step: 88/77, loss: 0.0198269821703434 2023-01-23 22:18:28.594844: step: 92/77, loss: 0.019170943647623062 2023-01-23 22:18:29.842994: step: 96/77, loss: 0.02982323244214058 2023-01-23 22:18:31.139347: step: 100/77, loss: 0.026217911392450333 2023-01-23 22:18:32.404181: step: 104/77, loss: 0.04693746566772461 2023-01-23 22:18:33.657708: step: 108/77, loss: 0.025034580379724503 2023-01-23 22:18:34.932417: step: 112/77, loss: 0.041843514889478683 2023-01-23 22:18:36.183590: step: 116/77, loss: 0.04162782058119774 2023-01-23 22:18:37.488223: step: 120/77, loss: 0.023862779140472412 2023-01-23 22:18:38.782768: step: 124/77, loss: 0.016180217266082764 2023-01-23 22:18:40.066921: step: 128/77, loss: 0.014978856779634953 2023-01-23 22:18:41.381514: step: 132/77, loss: 0.012652370147407055 2023-01-23 22:18:42.743453: step: 136/77, loss: 0.04716186225414276 2023-01-23 22:18:43.974219: step: 140/77, loss: 0.008099338971078396 2023-01-23 22:18:45.250712: step: 144/77, loss: 0.006883854046463966 2023-01-23 22:18:46.539165: step: 148/77, loss: 0.08612009137868881 2023-01-23 22:18:47.815885: step: 152/77, loss: 0.004275473766028881 2023-01-23 22:18:49.093006: step: 156/77, loss: 0.006404031068086624 2023-01-23 22:18:50.408499: step: 160/77, loss: 0.06812773644924164 2023-01-23 22:18:51.690569: step: 164/77, loss: 0.038200266659259796 2023-01-23 22:18:52.939800: step: 168/77, loss: 0.004217622336000204 2023-01-23 22:18:54.244120: step: 172/77, loss: 0.03938934579491615 2023-01-23 22:18:55.533734: step: 176/77, loss: 0.02855922468006611 2023-01-23 22:18:56.838391: step: 180/77, loss: 0.029067791998386383 2023-01-23 22:18:58.129425: step: 184/77, loss: 0.016929026693105698 2023-01-23 22:18:59.411373: step: 188/77, loss: 0.18994294106960297 2023-01-23 22:19:00.717470: step: 192/77, loss: 0.10848405212163925 2023-01-23 22:19:01.963109: step: 196/77, loss: 0.03094695694744587 2023-01-23 22:19:03.268905: step: 200/77, loss: 0.017419282346963882 2023-01-23 22:19:04.562737: step: 204/77, loss: 0.09706324338912964 2023-01-23 22:19:05.855468: step: 208/77, loss: 0.019700979813933372 2023-01-23 22:19:07.170192: step: 212/77, loss: 0.010418681427836418 2023-01-23 22:19:08.458645: step: 216/77, loss: 0.015615657903254032 2023-01-23 22:19:09.765796: step: 220/77, loss: 0.01729726418852806 2023-01-23 22:19:11.070788: step: 224/77, loss: 0.028866499662399292 2023-01-23 22:19:12.332450: step: 228/77, loss: 0.03619959205389023 2023-01-23 22:19:13.635277: step: 232/77, loss: 0.047028932720422745 2023-01-23 22:19:14.927154: step: 236/77, loss: 0.024705318734049797 2023-01-23 22:19:16.236504: step: 240/77, loss: 0.0075832996517419815 2023-01-23 22:19:17.507265: step: 244/77, loss: 0.024004830047488213 2023-01-23 22:19:18.772067: step: 248/77, loss: 0.033208757638931274 2023-01-23 22:19:20.083653: step: 252/77, loss: 0.03347267583012581 2023-01-23 22:19:21.391459: step: 256/77, loss: 0.015569946728646755 2023-01-23 22:19:22.705488: step: 260/77, loss: 0.22618597745895386 2023-01-23 22:19:23.981223: step: 264/77, loss: 0.04095214605331421 2023-01-23 22:19:25.279929: step: 268/77, loss: 0.01501399651169777 2023-01-23 22:19:26.588676: step: 272/77, loss: 0.06192564591765404 2023-01-23 22:19:27.877988: step: 276/77, loss: 0.01665923185646534 2023-01-23 22:19:29.189130: step: 280/77, loss: 0.04294333606958389 2023-01-23 22:19:30.430779: step: 284/77, loss: 0.024771392345428467 2023-01-23 22:19:31.697382: step: 288/77, loss: 0.07365136593580246 2023-01-23 22:19:32.959132: step: 292/77, loss: 0.014921758323907852 2023-01-23 22:19:34.294294: step: 296/77, loss: 0.06613760441541672 2023-01-23 22:19:35.615538: step: 300/77, loss: 0.026917394250631332 2023-01-23 22:19:36.923601: step: 304/77, loss: 0.010461905039846897 2023-01-23 22:19:38.198579: step: 308/77, loss: 0.019604841247200966 2023-01-23 22:19:39.510996: step: 312/77, loss: 0.008131084032356739 2023-01-23 22:19:40.771684: step: 316/77, loss: 0.018199313431978226 2023-01-23 22:19:42.103788: step: 320/77, loss: 0.024113517254590988 2023-01-23 22:19:43.429849: step: 324/77, loss: 0.007118526380509138 2023-01-23 22:19:44.765203: step: 328/77, loss: 0.046088460832834244 2023-01-23 22:19:46.062182: step: 332/77, loss: 0.008069220930337906 2023-01-23 22:19:47.359309: step: 336/77, loss: 0.010732055641710758 2023-01-23 22:19:48.678366: step: 340/77, loss: 0.01751190796494484 2023-01-23 22:19:49.959418: step: 344/77, loss: 0.0584302619099617 2023-01-23 22:19:51.191054: step: 348/77, loss: 0.01757621206343174 2023-01-23 22:19:52.502854: step: 352/77, loss: 0.03035123646259308 2023-01-23 22:19:53.801474: step: 356/77, loss: 0.06061801314353943 2023-01-23 22:19:55.110687: step: 360/77, loss: 0.052744798362255096 2023-01-23 22:19:56.393328: step: 364/77, loss: 0.04332219436764717 2023-01-23 22:19:57.734039: step: 368/77, loss: 0.06133495271205902 2023-01-23 22:19:58.997912: step: 372/77, loss: 0.014132341369986534 2023-01-23 22:20:00.290035: step: 376/77, loss: 0.018833355978131294 2023-01-23 22:20:01.565283: step: 380/77, loss: 0.01984577253460884 2023-01-23 22:20:02.876254: step: 384/77, loss: 0.013279399834573269 2023-01-23 22:20:04.189985: step: 388/77, loss: 0.06788233667612076 ================================================== Loss: 0.035 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5038167938931297, 'f1': 0.6534653465346535}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.017121362791300963, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5038167938931297, 'f1': 0.6534653465346535}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.017121362791300963, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 4 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:22:07.300691: step: 4/77, loss: 0.017973169684410095 2023-01-23 22:22:08.616675: step: 8/77, loss: 0.032591622322797775 2023-01-23 22:22:09.884428: step: 12/77, loss: 0.027060119435191154 2023-01-23 22:22:11.178873: step: 16/77, loss: 0.009688153862953186 2023-01-23 22:22:12.497218: step: 20/77, loss: 0.05535442754626274 2023-01-23 22:22:13.755884: step: 24/77, loss: 0.016799340024590492 2023-01-23 22:22:15.037778: step: 28/77, loss: 0.028497029095888138 2023-01-23 22:22:16.320568: step: 32/77, loss: 0.016341451555490494 2023-01-23 22:22:17.601554: step: 36/77, loss: 0.018108392134308815 2023-01-23 22:22:18.833632: step: 40/77, loss: 0.0141389025375247 2023-01-23 22:22:20.168097: step: 44/77, loss: 0.0389748252928257 2023-01-23 22:22:21.471481: step: 48/77, loss: 0.059972554445266724 2023-01-23 22:22:22.746914: step: 52/77, loss: 0.08551490306854248 2023-01-23 22:22:23.994280: step: 56/77, loss: 0.01003539189696312 2023-01-23 22:22:25.282255: step: 60/77, loss: 0.08415193110704422 2023-01-23 22:22:26.537715: step: 64/77, loss: 0.05124989524483681 2023-01-23 22:22:27.791219: step: 68/77, loss: 0.028651192784309387 2023-01-23 22:22:29.094489: step: 72/77, loss: 0.06674402952194214 2023-01-23 22:22:30.379465: step: 76/77, loss: 0.0061377864331007 2023-01-23 22:22:31.694830: step: 80/77, loss: 0.0033220085315406322 2023-01-23 22:22:33.031936: step: 84/77, loss: 0.0645206943154335 2023-01-23 22:22:34.332425: step: 88/77, loss: 0.007097979541867971 2023-01-23 22:22:35.637677: step: 92/77, loss: 0.07385362684726715 2023-01-23 22:22:36.923231: step: 96/77, loss: 0.024214647710323334 2023-01-23 22:22:38.235951: step: 100/77, loss: 0.03102937713265419 2023-01-23 22:22:39.578474: step: 104/77, loss: 0.0496261827647686 2023-01-23 22:22:40.903733: step: 108/77, loss: 0.022620979696512222 2023-01-23 22:22:42.175144: step: 112/77, loss: 0.05239538848400116 2023-01-23 22:22:43.452534: step: 116/77, loss: 0.032066408544778824 2023-01-23 22:22:44.756011: step: 120/77, loss: 0.0036533609963953495 2023-01-23 22:22:46.068629: step: 124/77, loss: 0.013895252719521523 2023-01-23 22:22:47.306137: step: 128/77, loss: 0.043253764510154724 2023-01-23 22:22:48.592943: step: 132/77, loss: 0.0052756816148757935 2023-01-23 22:22:49.903492: step: 136/77, loss: 0.04925745353102684 2023-01-23 22:22:51.170346: step: 140/77, loss: 0.032040733844041824 2023-01-23 22:22:52.459534: step: 144/77, loss: 0.023572130128741264 2023-01-23 22:22:53.734662: step: 148/77, loss: 0.024410491809248924 2023-01-23 22:22:55.032663: step: 152/77, loss: 0.009820953011512756 2023-01-23 22:22:56.288861: step: 156/77, loss: 0.045823048800230026 2023-01-23 22:22:57.573004: step: 160/77, loss: 0.005057765636593103 2023-01-23 22:22:58.898113: step: 164/77, loss: 0.03875165060162544 2023-01-23 22:23:00.235733: step: 168/77, loss: 0.025209451094269753 2023-01-23 22:23:01.557427: step: 172/77, loss: 0.02618713490664959 2023-01-23 22:23:02.892713: step: 176/77, loss: 0.028252843767404556 2023-01-23 22:23:04.174106: step: 180/77, loss: 0.004084436688572168 2023-01-23 22:23:05.455209: step: 184/77, loss: 0.019054951146245003 2023-01-23 22:23:06.761121: step: 188/77, loss: 0.012868019752204418 2023-01-23 22:23:08.105110: step: 192/77, loss: 0.06383177638053894 2023-01-23 22:23:09.448616: step: 196/77, loss: 0.016221703961491585 2023-01-23 22:23:10.748472: step: 200/77, loss: 0.021557895466685295 2023-01-23 22:23:12.048494: step: 204/77, loss: 0.00647857878357172 2023-01-23 22:23:13.327014: step: 208/77, loss: 0.021689537912607193 2023-01-23 22:23:14.624480: step: 212/77, loss: 0.01842329651117325 2023-01-23 22:23:15.930715: step: 216/77, loss: 0.02668784372508526 2023-01-23 22:23:17.232680: step: 220/77, loss: 0.01759856380522251 2023-01-23 22:23:18.543854: step: 224/77, loss: 0.007021921221166849 2023-01-23 22:23:19.880226: step: 228/77, loss: 0.029395049437880516 2023-01-23 22:23:21.185957: step: 232/77, loss: 0.012759885750710964 2023-01-23 22:23:22.495234: step: 236/77, loss: 0.00871328730136156 2023-01-23 22:23:23.825696: step: 240/77, loss: 0.02408706396818161 2023-01-23 22:23:25.112719: step: 244/77, loss: 0.046969473361968994 2023-01-23 22:23:26.429441: step: 248/77, loss: 0.060625314712524414 2023-01-23 22:23:27.703738: step: 252/77, loss: 0.058289043605327606 2023-01-23 22:23:28.996637: step: 256/77, loss: 0.0016667278250679374 2023-01-23 22:23:30.287510: step: 260/77, loss: 0.028811514377593994 2023-01-23 22:23:31.589817: step: 264/77, loss: 0.0719747543334961 2023-01-23 22:23:32.948189: step: 268/77, loss: 0.020027348771691322 2023-01-23 22:23:34.219064: step: 272/77, loss: 0.07541616261005402 2023-01-23 22:23:35.528701: step: 276/77, loss: 0.023574626073241234 2023-01-23 22:23:36.803172: step: 280/77, loss: 0.04856455698609352 2023-01-23 22:23:38.084468: step: 284/77, loss: 0.09293892234563828 2023-01-23 22:23:39.355159: step: 288/77, loss: 0.11887235194444656 2023-01-23 22:23:40.628117: step: 292/77, loss: 0.00804845243692398 2023-01-23 22:23:41.882653: step: 296/77, loss: 0.05419022589921951 2023-01-23 22:23:43.165050: step: 300/77, loss: 0.017192896455526352 2023-01-23 22:23:44.459796: step: 304/77, loss: 0.010086746886372566 2023-01-23 22:23:45.784034: step: 308/77, loss: 0.08471477776765823 2023-01-23 22:23:47.073253: step: 312/77, loss: 0.07080481946468353 2023-01-23 22:23:48.342308: step: 316/77, loss: 0.018073439598083496 2023-01-23 22:23:49.643213: step: 320/77, loss: 0.11502989381551743 2023-01-23 22:23:50.957033: step: 324/77, loss: 0.004280322231352329 2023-01-23 22:23:52.305196: step: 328/77, loss: 0.015781881287693977 2023-01-23 22:23:53.568481: step: 332/77, loss: 0.13091981410980225 2023-01-23 22:23:54.844912: step: 336/77, loss: 0.05560621619224548 2023-01-23 22:23:56.112957: step: 340/77, loss: 0.030395524576306343 2023-01-23 22:23:57.450033: step: 344/77, loss: 0.014790229499340057 2023-01-23 22:23:58.761335: step: 348/77, loss: 0.003662054194137454 2023-01-23 22:24:00.072546: step: 352/77, loss: 0.04369152709841728 2023-01-23 22:24:01.375306: step: 356/77, loss: 0.1887076199054718 2023-01-23 22:24:02.657720: step: 360/77, loss: 0.054661963135004044 2023-01-23 22:24:03.927203: step: 364/77, loss: 0.022252492606639862 2023-01-23 22:24:05.236181: step: 368/77, loss: 0.09166667610406876 2023-01-23 22:24:06.542303: step: 372/77, loss: 0.020127348601818085 2023-01-23 22:24:07.805796: step: 376/77, loss: 0.00931414496153593 2023-01-23 22:24:09.091877: step: 380/77, loss: 0.03850318491458893 2023-01-23 22:24:10.407242: step: 384/77, loss: 0.042095109820365906 2023-01-23 22:24:11.703549: step: 388/77, loss: 0.016215737909078598 ================================================== Loss: 0.036 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Chinese: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Korean: {'template': {'p': 0.926829268292683, 'r': 0.5801526717557252, 'f1': 0.7136150234741785}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012608039283996087, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 5 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:25:59.446142: step: 4/77, loss: 0.014158310368657112 2023-01-23 22:26:00.724952: step: 8/77, loss: 0.04366450011730194 2023-01-23 22:26:02.006284: step: 12/77, loss: 0.002636625897139311 2023-01-23 22:26:03.306424: step: 16/77, loss: 0.022517450153827667 2023-01-23 22:26:04.548515: step: 20/77, loss: 0.020733755081892014 2023-01-23 22:26:05.877467: step: 24/77, loss: 0.1309209167957306 2023-01-23 22:26:07.185937: step: 28/77, loss: 0.005739598069339991 2023-01-23 22:26:08.475173: step: 32/77, loss: 0.002989714965224266 2023-01-23 22:26:09.784848: step: 36/77, loss: 0.046741776168346405 2023-01-23 22:26:11.075020: step: 40/77, loss: 0.1020488366484642 2023-01-23 22:26:12.317015: step: 44/77, loss: 0.013086818158626556 2023-01-23 22:26:13.613589: step: 48/77, loss: 0.014181993901729584 2023-01-23 22:26:14.902010: step: 52/77, loss: 0.04863457381725311 2023-01-23 22:26:16.215757: step: 56/77, loss: 0.004329562187194824 2023-01-23 22:26:17.495965: step: 60/77, loss: 0.04907675087451935 2023-01-23 22:26:18.776895: step: 64/77, loss: 0.0503888800740242 2023-01-23 22:26:20.087633: step: 68/77, loss: 0.02209595963358879 2023-01-23 22:26:21.354591: step: 72/77, loss: 0.021710550412535667 2023-01-23 22:26:22.655995: step: 76/77, loss: 0.013041822239756584 2023-01-23 22:26:23.962167: step: 80/77, loss: 0.004665164276957512 2023-01-23 22:26:25.213383: step: 84/77, loss: 0.03621523827314377 2023-01-23 22:26:26.514148: step: 88/77, loss: 0.0018241109792143106 2023-01-23 22:26:27.784297: step: 92/77, loss: 0.06880193203687668 2023-01-23 22:26:29.116456: step: 96/77, loss: 0.018401481211185455 2023-01-23 22:26:30.462045: step: 100/77, loss: 0.02690611407160759 2023-01-23 22:26:31.784294: step: 104/77, loss: 0.039673320949077606 2023-01-23 22:26:33.099323: step: 108/77, loss: 0.014530510641634464 2023-01-23 22:26:34.418749: step: 112/77, loss: 0.042198315262794495 2023-01-23 22:26:35.729040: step: 116/77, loss: 0.011293873190879822 2023-01-23 22:26:37.078030: step: 120/77, loss: 0.004281324800103903 2023-01-23 22:26:38.418864: step: 124/77, loss: 0.006314275786280632 2023-01-23 22:26:39.738763: step: 128/77, loss: 0.004242511931806803 2023-01-23 22:26:41.040519: step: 132/77, loss: 0.0036362670361995697 2023-01-23 22:26:42.282921: step: 136/77, loss: 0.053869958966970444 2023-01-23 22:26:43.543338: step: 140/77, loss: 0.033010292798280716 2023-01-23 22:26:44.902705: step: 144/77, loss: 0.028418347239494324 2023-01-23 22:26:46.214706: step: 148/77, loss: 0.03635575622320175 2023-01-23 22:26:47.553434: step: 152/77, loss: 0.00185483624227345 2023-01-23 22:26:48.869994: step: 156/77, loss: 0.025224221870303154 2023-01-23 22:26:50.208750: step: 160/77, loss: 0.02771889604628086 2023-01-23 22:26:51.505196: step: 164/77, loss: 0.030288076028227806 2023-01-23 22:26:52.842061: step: 168/77, loss: 0.0323544517159462 2023-01-23 22:26:54.151182: step: 172/77, loss: 0.019649991765618324 2023-01-23 22:26:55.470613: step: 176/77, loss: 0.09695424884557724 2023-01-23 22:26:56.757570: step: 180/77, loss: 0.03970777988433838 2023-01-23 22:26:58.033457: step: 184/77, loss: 0.008301706984639168 2023-01-23 22:26:59.260210: step: 188/77, loss: 0.003423915943130851 2023-01-23 22:27:00.581220: step: 192/77, loss: 0.022232314571738243 2023-01-23 22:27:01.860075: step: 196/77, loss: 0.004373743664473295 2023-01-23 22:27:03.161329: step: 200/77, loss: 0.13403457403182983 2023-01-23 22:27:04.436683: step: 204/77, loss: 0.0042198095470666885 2023-01-23 22:27:05.768545: step: 208/77, loss: 0.0686882734298706 2023-01-23 22:27:07.083427: step: 212/77, loss: 0.000655624084174633 2023-01-23 22:27:08.339999: step: 216/77, loss: 0.04420031979680061 2023-01-23 22:27:09.670221: step: 220/77, loss: 0.01031609158962965 2023-01-23 22:27:10.993532: step: 224/77, loss: 0.002405523555353284 2023-01-23 22:27:12.337355: step: 228/77, loss: 0.02606010064482689 2023-01-23 22:27:13.605903: step: 232/77, loss: 0.016452355310320854 2023-01-23 22:27:14.889818: step: 236/77, loss: 0.0966537743806839 2023-01-23 22:27:16.180517: step: 240/77, loss: 0.015614238567650318 2023-01-23 22:27:17.427305: step: 244/77, loss: 0.017007920891046524 2023-01-23 22:27:18.744661: step: 248/77, loss: 0.03029618225991726 2023-01-23 22:27:20.080388: step: 252/77, loss: 0.013531284406781197 2023-01-23 22:27:21.405490: step: 256/77, loss: 0.01680459827184677 2023-01-23 22:27:22.741328: step: 260/77, loss: 0.024821948260068893 2023-01-23 22:27:24.067784: step: 264/77, loss: 0.03455809876322746 2023-01-23 22:27:25.318672: step: 268/77, loss: 0.04500247538089752 2023-01-23 22:27:26.650484: step: 272/77, loss: 0.10835660248994827 2023-01-23 22:27:27.984731: step: 276/77, loss: 0.006907912902534008 2023-01-23 22:27:29.288741: step: 280/77, loss: 0.07391379773616791 2023-01-23 22:27:30.572679: step: 284/77, loss: 0.018085170537233353 2023-01-23 22:27:31.891235: step: 288/77, loss: 0.007498640567064285 2023-01-23 22:27:33.195823: step: 292/77, loss: 0.001336018554866314 2023-01-23 22:27:34.513265: step: 296/77, loss: 0.0006786617450416088 2023-01-23 22:27:35.806392: step: 300/77, loss: 0.010314485989511013 2023-01-23 22:27:37.144253: step: 304/77, loss: 0.02813785895705223 2023-01-23 22:27:38.471860: step: 308/77, loss: 0.013725925236940384 2023-01-23 22:27:39.766187: step: 312/77, loss: 0.005271845497190952 2023-01-23 22:27:41.043770: step: 316/77, loss: 0.02690902352333069 2023-01-23 22:27:42.312936: step: 320/77, loss: 0.04803672432899475 2023-01-23 22:27:43.641419: step: 324/77, loss: 0.05544379726052284 2023-01-23 22:27:45.011716: step: 328/77, loss: 0.021013982594013214 2023-01-23 22:27:46.305426: step: 332/77, loss: 0.06210104376077652 2023-01-23 22:27:47.622722: step: 336/77, loss: 0.010880689136683941 2023-01-23 22:27:48.937259: step: 340/77, loss: 0.017330151051282883 2023-01-23 22:27:50.278778: step: 344/77, loss: 0.1314152628183365 2023-01-23 22:27:51.517300: step: 348/77, loss: 0.006899089552462101 2023-01-23 22:27:52.815147: step: 352/77, loss: 0.019113019108772278 2023-01-23 22:27:54.145973: step: 356/77, loss: 0.0011101725976914167 2023-01-23 22:27:55.404729: step: 360/77, loss: 0.01264708861708641 2023-01-23 22:27:56.668112: step: 364/77, loss: 0.04578924551606178 2023-01-23 22:27:57.967875: step: 368/77, loss: 0.0057904161512851715 2023-01-23 22:27:59.254579: step: 372/77, loss: 0.004603937268257141 2023-01-23 22:28:00.509671: step: 376/77, loss: 0.02703516185283661 2023-01-23 22:28:01.795397: step: 380/77, loss: 0.009556187316775322 2023-01-23 22:28:03.084482: step: 384/77, loss: 0.017396705225110054 2023-01-23 22:28:04.413757: step: 388/77, loss: 0.0022095218300819397 ================================================== Loss: 0.029 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test Chinese: {'template': {'p': 0.9466666666666667, 'r': 0.5419847328244275, 'f1': 0.6893203883495146}, 'slot': {'p': 0.5, 'r': 0.00993676603432701, 'f1': 0.01948627103631532}, 'combined': 0.013432283918236776, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test Korean: {'template': {'p': 0.9342105263157895, 'r': 0.5419847328244275, 'f1': 0.6859903381642513}, 'slot': {'p': 0.4782608695652174, 'r': 0.00993676603432701, 'f1': 0.019469026548672566}, 'combined': 0.01335556410585268, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test Russian: {'template': {'p': 0.9466666666666667, 'r': 0.5419847328244275, 'f1': 0.6893203883495146}, 'slot': {'p': 0.4782608695652174, 'r': 0.00993676603432701, 'f1': 0.019469026548672566}, 'combined': 0.013420396941317983, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 6 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:29:46.219509: step: 4/77, loss: 0.0183593537658453 2023-01-23 22:29:47.478536: step: 8/77, loss: 0.005260112229734659 2023-01-23 22:29:48.729738: step: 12/77, loss: 0.0007216347148641944 2023-01-23 22:29:50.010070: step: 16/77, loss: 0.051519881933927536 2023-01-23 22:29:51.206442: step: 20/77, loss: 0.002809650730341673 2023-01-23 22:29:52.477969: step: 24/77, loss: 0.06290657073259354 2023-01-23 22:29:53.748690: step: 28/77, loss: 0.028903154656291008 2023-01-23 22:29:55.048668: step: 32/77, loss: 0.0008781985379755497 2023-01-23 22:29:56.337366: step: 36/77, loss: 0.01905008964240551 2023-01-23 22:29:57.657997: step: 40/77, loss: 0.015037070028483868 2023-01-23 22:29:58.949559: step: 44/77, loss: 0.026022832840681076 2023-01-23 22:30:00.201173: step: 48/77, loss: 0.002238120650872588 2023-01-23 22:30:01.550547: step: 52/77, loss: 0.01229383796453476 2023-01-23 22:30:02.861317: step: 56/77, loss: 0.03464057669043541 2023-01-23 22:30:04.097386: step: 60/77, loss: 0.0020629707723855972 2023-01-23 22:30:05.394785: step: 64/77, loss: 0.02100500650703907 2023-01-23 22:30:06.672774: step: 68/77, loss: 0.09934692829847336 2023-01-23 22:30:07.975387: step: 72/77, loss: 0.022709909826517105 2023-01-23 22:30:09.294268: step: 76/77, loss: 0.007556264288723469 2023-01-23 22:30:10.571639: step: 80/77, loss: 0.002152753295376897 2023-01-23 22:30:11.904092: step: 84/77, loss: 0.017157964408397675 2023-01-23 22:30:13.182698: step: 88/77, loss: 0.021427417173981667 2023-01-23 22:30:14.536816: step: 92/77, loss: 0.000519811874255538 2023-01-23 22:30:15.830797: step: 96/77, loss: 0.04389156028628349 2023-01-23 22:30:17.140608: step: 100/77, loss: 0.030964136123657227 2023-01-23 22:30:18.468611: step: 104/77, loss: 0.03548784181475639 2023-01-23 22:30:19.806304: step: 108/77, loss: 0.04955758899450302 2023-01-23 22:30:21.117558: step: 112/77, loss: 0.01200494822114706 2023-01-23 22:30:22.435901: step: 116/77, loss: 0.09407459199428558 2023-01-23 22:30:23.717778: step: 120/77, loss: 0.0007696656975895166 2023-01-23 22:30:24.993260: step: 124/77, loss: 0.0031200533267110586 2023-01-23 22:30:26.287310: step: 128/77, loss: 0.01818021386861801 2023-01-23 22:30:27.621585: step: 132/77, loss: 0.010161369107663631 2023-01-23 22:30:28.926015: step: 136/77, loss: 0.02967633120715618 2023-01-23 22:30:30.230063: step: 140/77, loss: 0.037315886467695236 2023-01-23 22:30:31.532018: step: 144/77, loss: 0.02900487184524536 2023-01-23 22:30:32.843144: step: 148/77, loss: 0.0012069009244441986 2023-01-23 22:30:34.105466: step: 152/77, loss: 0.0053014555014669895 2023-01-23 22:30:35.464327: step: 156/77, loss: 0.009256775490939617 2023-01-23 22:30:36.707786: step: 160/77, loss: 0.03802155330777168 2023-01-23 22:30:38.017927: step: 164/77, loss: 0.022010542452335358 2023-01-23 22:30:39.335565: step: 168/77, loss: 0.036441680043935776 2023-01-23 22:30:40.611987: step: 172/77, loss: 0.07211640477180481 2023-01-23 22:30:41.895588: step: 176/77, loss: 0.00906977616250515 2023-01-23 22:30:43.207413: step: 180/77, loss: 0.07605388760566711 2023-01-23 22:30:44.514888: step: 184/77, loss: 0.04105954244732857 2023-01-23 22:30:45.822616: step: 188/77, loss: 0.029035797342658043 2023-01-23 22:30:47.135015: step: 192/77, loss: 0.02307794988155365 2023-01-23 22:30:48.472663: step: 196/77, loss: 0.020407551899552345 2023-01-23 22:30:49.729819: step: 200/77, loss: 0.009717256762087345 2023-01-23 22:30:51.021523: step: 204/77, loss: 0.025658661499619484 2023-01-23 22:30:52.349082: step: 208/77, loss: 0.012879461981356144 2023-01-23 22:30:53.610951: step: 212/77, loss: 0.01909041218459606 2023-01-23 22:30:54.926093: step: 216/77, loss: 0.029446642845869064 2023-01-23 22:30:56.189601: step: 220/77, loss: 0.07164009660482407 2023-01-23 22:30:57.525870: step: 224/77, loss: 0.019298046827316284 2023-01-23 22:30:58.813453: step: 228/77, loss: 0.20703347027301788 2023-01-23 22:31:00.071519: step: 232/77, loss: 0.008604494854807854 2023-01-23 22:31:01.378198: step: 236/77, loss: 0.0033027713652700186 2023-01-23 22:31:02.629497: step: 240/77, loss: 0.028479279950261116 2023-01-23 22:31:03.931698: step: 244/77, loss: 0.020767278969287872 2023-01-23 22:31:05.223096: step: 248/77, loss: 0.0023850565776228905 2023-01-23 22:31:06.519836: step: 252/77, loss: 0.03148641437292099 2023-01-23 22:31:07.810870: step: 256/77, loss: 0.01915784925222397 2023-01-23 22:31:09.043243: step: 260/77, loss: 0.007667901925742626 2023-01-23 22:31:10.344121: step: 264/77, loss: 0.022026922553777695 2023-01-23 22:31:11.604175: step: 268/77, loss: 0.01379552111029625 2023-01-23 22:31:12.905436: step: 272/77, loss: 0.009896479547023773 2023-01-23 22:31:14.184075: step: 276/77, loss: 0.03870394080877304 2023-01-23 22:31:15.514023: step: 280/77, loss: 0.09177344292402267 2023-01-23 22:31:16.808109: step: 284/77, loss: 0.02456502430140972 2023-01-23 22:31:18.162109: step: 288/77, loss: 0.002290900330990553 2023-01-23 22:31:19.465010: step: 292/77, loss: 0.0007645561126992106 2023-01-23 22:31:20.746474: step: 296/77, loss: 0.003591812215745449 2023-01-23 22:31:22.087753: step: 300/77, loss: 0.02337750233709812 2023-01-23 22:31:23.342279: step: 304/77, loss: 0.00023124905419535935 2023-01-23 22:31:24.628814: step: 308/77, loss: 0.017381660640239716 2023-01-23 22:31:25.965668: step: 312/77, loss: 0.024244729429483414 2023-01-23 22:31:27.271571: step: 316/77, loss: 0.0018833799986168742 2023-01-23 22:31:28.586875: step: 320/77, loss: 0.004297305829823017 2023-01-23 22:31:29.870011: step: 324/77, loss: 0.00848829373717308 2023-01-23 22:31:31.152754: step: 328/77, loss: 0.011697773821651936 2023-01-23 22:31:32.472974: step: 332/77, loss: 0.028859535232186317 2023-01-23 22:31:33.779958: step: 336/77, loss: 0.0342305451631546 2023-01-23 22:31:35.087819: step: 340/77, loss: 0.009530341252684593 2023-01-23 22:31:36.374686: step: 344/77, loss: 0.05515669286251068 2023-01-23 22:31:37.640740: step: 348/77, loss: 0.05997423827648163 2023-01-23 22:31:38.965504: step: 352/77, loss: 0.16188277304172516 2023-01-23 22:31:40.269472: step: 356/77, loss: 0.03290410712361336 2023-01-23 22:31:41.542730: step: 360/77, loss: 0.01108237449079752 2023-01-23 22:31:42.851824: step: 364/77, loss: 0.011187870055437088 2023-01-23 22:31:44.163753: step: 368/77, loss: 0.0055564031936228275 2023-01-23 22:31:45.435500: step: 372/77, loss: 0.0013651493936777115 2023-01-23 22:31:46.771557: step: 376/77, loss: 0.01341039314866066 2023-01-23 22:31:48.126353: step: 380/77, loss: 0.009387511759996414 2023-01-23 22:31:49.491911: step: 384/77, loss: 0.020141761749982834 2023-01-23 22:31:50.770391: step: 388/77, loss: 0.053573716431856155 ================================================== Loss: 0.027 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04890349201497669, 'epoch': 6} Test Chinese: {'template': {'p': 0.971830985915493, 'r': 0.5267175572519084, 'f1': 0.6831683168316832}, 'slot': {'p': 0.4642857142857143, 'r': 0.011743450767841012, 'f1': 0.022907488986784144}, 'combined': 0.015649670693941645, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04890349201497669, 'epoch': 6} Test Korean: {'template': {'p': 0.971830985915493, 'r': 0.5267175572519084, 'f1': 0.6831683168316832}, 'slot': {'p': 0.4642857142857143, 'r': 0.011743450767841012, 'f1': 0.022907488986784144}, 'combined': 0.015649670693941645, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04988944951527864, 'epoch': 6} Test Russian: {'template': {'p': 0.971830985915493, 'r': 0.5267175572519084, 'f1': 0.6831683168316832}, 'slot': {'p': 0.4642857142857143, 'r': 0.011743450767841012, 'f1': 0.022907488986784144}, 'combined': 0.015649670693941645, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 6} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 7 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:33:32.519696: step: 4/77, loss: 0.0011636499548330903 2023-01-23 22:33:33.787349: step: 8/77, loss: 0.0012626384850591421 2023-01-23 22:33:35.078106: step: 12/77, loss: 0.017299551516771317 2023-01-23 22:33:36.347944: step: 16/77, loss: 0.0068957023322582245 2023-01-23 22:33:37.685144: step: 20/77, loss: 0.056037407368421555 2023-01-23 22:33:38.969440: step: 24/77, loss: 0.006196138449013233 2023-01-23 22:33:40.261492: step: 28/77, loss: 0.014314234256744385 2023-01-23 22:33:41.575905: step: 32/77, loss: 0.004170101601630449 2023-01-23 22:33:42.893531: step: 36/77, loss: 0.008770820684731007 2023-01-23 22:33:44.193721: step: 40/77, loss: 0.02249545231461525 2023-01-23 22:33:45.459239: step: 44/77, loss: 0.012950205244123936 2023-01-23 22:33:46.787467: step: 48/77, loss: 0.04626951366662979 2023-01-23 22:33:48.094196: step: 52/77, loss: 0.006529450882226229 2023-01-23 22:33:49.409451: step: 56/77, loss: 0.03173508495092392 2023-01-23 22:33:50.710566: step: 60/77, loss: 0.004960111808031797 2023-01-23 22:33:51.999046: step: 64/77, loss: 0.024349186569452286 2023-01-23 22:33:53.340361: step: 68/77, loss: 0.031386882066726685 2023-01-23 22:33:54.635576: step: 72/77, loss: 0.013314485549926758 2023-01-23 22:33:55.903767: step: 76/77, loss: 0.002529819030314684 2023-01-23 22:33:57.166290: step: 80/77, loss: 0.00077056820737198 2023-01-23 22:33:58.444435: step: 84/77, loss: 0.01021594274789095 2023-01-23 22:33:59.735751: step: 88/77, loss: 0.02939591556787491 2023-01-23 22:34:01.041225: step: 92/77, loss: 0.0036745904944837093 2023-01-23 22:34:02.364861: step: 96/77, loss: 0.060694627463817596 2023-01-23 22:34:03.642952: step: 100/77, loss: 0.006574501283466816 2023-01-23 22:34:04.924358: step: 104/77, loss: 0.03242919594049454 2023-01-23 22:34:06.231144: step: 108/77, loss: 0.010640475898981094 2023-01-23 22:34:07.571971: step: 112/77, loss: 0.026455843821167946 2023-01-23 22:34:08.876746: step: 116/77, loss: 0.002888244343921542 2023-01-23 22:34:10.176941: step: 120/77, loss: 0.03597759082913399 2023-01-23 22:34:11.442596: step: 124/77, loss: 0.0699692815542221 2023-01-23 22:34:12.712048: step: 128/77, loss: 0.01892452873289585 2023-01-23 22:34:14.013207: step: 132/77, loss: 0.022072017192840576 2023-01-23 22:34:15.291840: step: 136/77, loss: 0.022971363738179207 2023-01-23 22:34:16.572445: step: 140/77, loss: 0.03502960875630379 2023-01-23 22:34:17.883226: step: 144/77, loss: 0.0037489754613488913 2023-01-23 22:34:19.158522: step: 148/77, loss: 0.0136228296905756 2023-01-23 22:34:20.447400: step: 152/77, loss: 0.016560204327106476 2023-01-23 22:34:21.675743: step: 156/77, loss: 0.01532871276140213 2023-01-23 22:34:22.983803: step: 160/77, loss: 0.02267695590853691 2023-01-23 22:34:24.303655: step: 164/77, loss: 0.02335749752819538 2023-01-23 22:34:25.579750: step: 168/77, loss: 0.003120360430330038 2023-01-23 22:34:26.902784: step: 172/77, loss: 0.010223385877907276 2023-01-23 22:34:28.166049: step: 176/77, loss: 0.041943587362766266 2023-01-23 22:34:29.478226: step: 180/77, loss: 0.0003906584461219609 2023-01-23 22:34:30.764381: step: 184/77, loss: 0.02529878355562687 2023-01-23 22:34:32.086717: step: 188/77, loss: 0.009768795222043991 2023-01-23 22:34:33.359206: step: 192/77, loss: 0.004147401079535484 2023-01-23 22:34:34.691805: step: 196/77, loss: 0.022411402314901352 2023-01-23 22:34:36.015515: step: 200/77, loss: 0.003953585401177406 2023-01-23 22:34:37.291084: step: 204/77, loss: 0.02571061998605728 2023-01-23 22:34:38.629821: step: 208/77, loss: 0.026847444474697113 2023-01-23 22:34:39.914651: step: 212/77, loss: 0.0530376061797142 2023-01-23 22:34:41.223258: step: 216/77, loss: 0.034075118601322174 2023-01-23 22:34:42.540401: step: 220/77, loss: 0.12706109881401062 2023-01-23 22:34:43.827330: step: 224/77, loss: 0.04803653433918953 2023-01-23 22:34:45.129172: step: 228/77, loss: 0.0221688412129879 2023-01-23 22:34:46.399893: step: 232/77, loss: 0.007290499284863472 2023-01-23 22:34:47.681639: step: 236/77, loss: 0.017815163359045982 2023-01-23 22:34:48.971886: step: 240/77, loss: 0.0004054011660628021 2023-01-23 22:34:50.234710: step: 244/77, loss: 0.11127576977014542 2023-01-23 22:34:51.513652: step: 248/77, loss: 0.0034016177523881197 2023-01-23 22:34:52.795363: step: 252/77, loss: 0.016864141449332237 2023-01-23 22:34:54.116511: step: 256/77, loss: 0.018525857478380203 2023-01-23 22:34:55.425295: step: 260/77, loss: 0.008020728826522827 2023-01-23 22:34:56.746491: step: 264/77, loss: 0.0004376893921289593 2023-01-23 22:34:58.019451: step: 268/77, loss: 0.022905485704541206 2023-01-23 22:34:59.294142: step: 272/77, loss: 0.02104055881500244 2023-01-23 22:35:00.577530: step: 276/77, loss: 0.00035322303301654756 2023-01-23 22:35:01.816032: step: 280/77, loss: 0.02914784848690033 2023-01-23 22:35:03.088067: step: 284/77, loss: 0.016437670215964317 2023-01-23 22:35:04.385855: step: 288/77, loss: 0.0516701266169548 2023-01-23 22:35:05.726435: step: 292/77, loss: 0.01641707308590412 2023-01-23 22:35:07.023080: step: 296/77, loss: 0.04560348019003868 2023-01-23 22:35:08.322055: step: 300/77, loss: 0.027226369827985764 2023-01-23 22:35:09.620785: step: 304/77, loss: 0.016126804053783417 2023-01-23 22:35:10.961343: step: 308/77, loss: 0.009214092046022415 2023-01-23 22:35:12.233065: step: 312/77, loss: 0.019477643072605133 2023-01-23 22:35:13.550215: step: 316/77, loss: 0.0008030800381675363 2023-01-23 22:35:14.812068: step: 320/77, loss: 0.03474152833223343 2023-01-23 22:35:16.128259: step: 324/77, loss: 0.0069403029046952724 2023-01-23 22:35:17.472784: step: 328/77, loss: 0.03761206567287445 2023-01-23 22:35:18.757110: step: 332/77, loss: 0.01154150441288948 2023-01-23 22:35:20.050120: step: 336/77, loss: 0.04174396023154259 2023-01-23 22:35:21.312941: step: 340/77, loss: 0.07245483994483948 2023-01-23 22:35:22.598678: step: 344/77, loss: 0.03308962658047676 2023-01-23 22:35:23.870306: step: 348/77, loss: 0.052312374114990234 2023-01-23 22:35:25.239228: step: 352/77, loss: 0.0009306677966378629 2023-01-23 22:35:26.579476: step: 356/77, loss: 0.02453486993908882 2023-01-23 22:35:27.847529: step: 360/77, loss: 0.01928197778761387 2023-01-23 22:35:29.151223: step: 364/77, loss: 0.048561591655015945 2023-01-23 22:35:30.428881: step: 368/77, loss: 0.02670525759458542 2023-01-23 22:35:31.721895: step: 372/77, loss: 0.002359720878303051 2023-01-23 22:35:33.035955: step: 376/77, loss: 0.015507223084568977 2023-01-23 22:35:34.296248: step: 380/77, loss: 0.019268155097961426 2023-01-23 22:35:35.616832: step: 384/77, loss: 0.0013935527531430125 2023-01-23 22:35:36.911398: step: 388/77, loss: 0.05123730003833771 ================================================== Loss: 0.023 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Chinese: {'template': {'p': 0.8352941176470589, 'r': 0.5419847328244275, 'f1': 0.6574074074074074}, 'slot': {'p': 0.5, 'r': 0.01084010840108401, 'f1': 0.021220159151193636}, 'combined': 0.01395028981235878, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Korean: {'template': {'p': 0.8333333333333334, 'r': 0.5343511450381679, 'f1': 0.6511627906976745}, 'slot': {'p': 0.5, 'r': 0.01084010840108401, 'f1': 0.021220159151193636}, 'combined': 0.013817778051940044, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Russian: {'template': {'p': 0.8255813953488372, 'r': 0.5419847328244275, 'f1': 0.6543778801843319}, 'slot': {'p': 0.5, 'r': 0.01084010840108401, 'f1': 0.021220159151193636}, 'combined': 0.013886002762532242, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 8 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:37:18.575560: step: 4/77, loss: 0.03130660578608513 2023-01-23 22:37:19.886102: step: 8/77, loss: 0.06778450310230255 2023-01-23 22:37:21.201633: step: 12/77, loss: 0.011556878685951233 2023-01-23 22:37:22.480756: step: 16/77, loss: 0.009970860555768013 2023-01-23 22:37:23.750398: step: 20/77, loss: 0.00028988681151531637 2023-01-23 22:37:25.072451: step: 24/77, loss: 0.021246779710054398 2023-01-23 22:37:26.397612: step: 28/77, loss: 0.04434437304735184 2023-01-23 22:37:27.687991: step: 32/77, loss: 0.06581749022006989 2023-01-23 22:37:29.001466: step: 36/77, loss: 0.014421924948692322 2023-01-23 22:37:30.264043: step: 40/77, loss: 0.021927639842033386 2023-01-23 22:37:31.507100: step: 44/77, loss: 0.016783470287919044 2023-01-23 22:37:32.777626: step: 48/77, loss: 0.024184659123420715 2023-01-23 22:37:34.095908: step: 52/77, loss: 0.04589436203241348 2023-01-23 22:37:35.394526: step: 56/77, loss: 0.001691319514065981 2023-01-23 22:37:36.720344: step: 60/77, loss: 0.10144390165805817 2023-01-23 22:37:37.955281: step: 64/77, loss: 0.023332679644227028 2023-01-23 22:37:39.229327: step: 68/77, loss: 0.0003049979859497398 2023-01-23 22:37:40.498612: step: 72/77, loss: 0.010500997304916382 2023-01-23 22:37:41.793554: step: 76/77, loss: 0.0020711207762360573 2023-01-23 22:37:43.069449: step: 80/77, loss: 0.03156790882349014 2023-01-23 22:37:44.313202: step: 84/77, loss: 0.010768542066216469 2023-01-23 22:37:45.589195: step: 88/77, loss: 0.006248690187931061 2023-01-23 22:37:46.858061: step: 92/77, loss: 0.006202485412359238 2023-01-23 22:37:48.096147: step: 96/77, loss: 0.002403137506917119 2023-01-23 22:37:49.357815: step: 100/77, loss: 0.05718894302845001 2023-01-23 22:37:50.670830: step: 104/77, loss: 0.0024291344452649355 2023-01-23 22:37:51.988800: step: 108/77, loss: 0.011834661476314068 2023-01-23 22:37:53.292311: step: 112/77, loss: 0.0486491434276104 2023-01-23 22:37:54.537319: step: 116/77, loss: 0.005136487074196339 2023-01-23 22:37:55.802621: step: 120/77, loss: 0.003164472058415413 2023-01-23 22:37:57.074142: step: 124/77, loss: 0.06037474051117897 2023-01-23 22:37:58.365888: step: 128/77, loss: 0.006406131200492382 2023-01-23 22:37:59.695586: step: 132/77, loss: 0.03345470502972603 2023-01-23 22:38:00.961990: step: 136/77, loss: 0.0025869226083159447 2023-01-23 22:38:02.222477: step: 140/77, loss: 0.13212698698043823 2023-01-23 22:38:03.503793: step: 144/77, loss: 0.0013358135474845767 2023-01-23 22:38:04.781741: step: 148/77, loss: 0.030007613822817802 2023-01-23 22:38:06.063393: step: 152/77, loss: 0.10049359500408173 2023-01-23 22:38:07.356470: step: 156/77, loss: 0.03069053217768669 2023-01-23 22:38:08.623337: step: 160/77, loss: 0.017465362325310707 2023-01-23 22:38:09.955394: step: 164/77, loss: 0.006294901482760906 2023-01-23 22:38:11.266212: step: 168/77, loss: 0.0030764213297516108 2023-01-23 22:38:12.578919: step: 172/77, loss: 0.033109307289123535 2023-01-23 22:38:13.906617: step: 176/77, loss: 0.11769656091928482 2023-01-23 22:38:15.225134: step: 180/77, loss: 0.04393400251865387 2023-01-23 22:38:16.563365: step: 184/77, loss: 0.00408775033429265 2023-01-23 22:38:17.885189: step: 188/77, loss: 0.010045966133475304 2023-01-23 22:38:19.245682: step: 192/77, loss: 0.041493598371744156 2023-01-23 22:38:20.584335: step: 196/77, loss: 0.005042455159127712 2023-01-23 22:38:21.916868: step: 200/77, loss: 0.042676590383052826 2023-01-23 22:38:23.193302: step: 204/77, loss: 0.08007384091615677 2023-01-23 22:38:24.491138: step: 208/77, loss: 0.03242093697190285 2023-01-23 22:38:25.771137: step: 212/77, loss: 0.0028606855776160955 2023-01-23 22:38:27.095537: step: 216/77, loss: 0.005781569518148899 2023-01-23 22:38:28.417391: step: 220/77, loss: 0.00999489612877369 2023-01-23 22:38:29.697809: step: 224/77, loss: 0.05668196454644203 2023-01-23 22:38:31.024707: step: 228/77, loss: 0.006618849001824856 2023-01-23 22:38:32.319657: step: 232/77, loss: 0.03185075893998146 2023-01-23 22:38:33.607616: step: 236/77, loss: 0.0239882729947567 2023-01-23 22:38:34.895182: step: 240/77, loss: 0.03320767357945442 2023-01-23 22:38:36.196915: step: 244/77, loss: 0.006629294715821743 2023-01-23 22:38:37.528458: step: 248/77, loss: 0.05239451304078102 2023-01-23 22:38:38.802187: step: 252/77, loss: 0.030738556757569313 2023-01-23 22:38:40.101750: step: 256/77, loss: 0.010103265754878521 2023-01-23 22:38:41.341443: step: 260/77, loss: 0.005290025845170021 2023-01-23 22:38:42.610733: step: 264/77, loss: 0.007733101490885019 2023-01-23 22:38:43.911530: step: 268/77, loss: 0.017247524112462997 2023-01-23 22:38:45.221820: step: 272/77, loss: 0.008979875594377518 2023-01-23 22:38:46.497632: step: 276/77, loss: 0.0008188042556867003 2023-01-23 22:38:47.779160: step: 280/77, loss: 0.008541867136955261 2023-01-23 22:38:49.104952: step: 284/77, loss: 0.018784543499350548 2023-01-23 22:38:50.414394: step: 288/77, loss: 0.001347896410152316 2023-01-23 22:38:51.713060: step: 292/77, loss: 0.005106473341584206 2023-01-23 22:38:53.011521: step: 296/77, loss: 0.0009395399829372764 2023-01-23 22:38:54.271251: step: 300/77, loss: 0.00232081301510334 2023-01-23 22:38:55.522567: step: 304/77, loss: 0.058472901582717896 2023-01-23 22:38:56.741939: step: 308/77, loss: 0.01651082932949066 2023-01-23 22:38:58.042653: step: 312/77, loss: 0.014799586497247219 2023-01-23 22:38:59.350611: step: 316/77, loss: 0.028199704363942146 2023-01-23 22:39:00.641440: step: 320/77, loss: 0.012422901578247547 2023-01-23 22:39:01.931747: step: 324/77, loss: 0.06933309882879257 2023-01-23 22:39:03.276937: step: 328/77, loss: 0.017022427171468735 2023-01-23 22:39:04.538606: step: 332/77, loss: 0.004448316525667906 2023-01-23 22:39:05.824461: step: 336/77, loss: 0.0006571123376488686 2023-01-23 22:39:07.085897: step: 340/77, loss: 0.011795529164373875 2023-01-23 22:39:08.414837: step: 344/77, loss: 0.03918634355068207 2023-01-23 22:39:09.699401: step: 348/77, loss: 0.00033447035821154714 2023-01-23 22:39:11.048779: step: 352/77, loss: 0.0402047336101532 2023-01-23 22:39:12.364868: step: 356/77, loss: 0.05152732506394386 2023-01-23 22:39:13.626622: step: 360/77, loss: 0.06429329514503479 2023-01-23 22:39:14.924385: step: 364/77, loss: 0.008129747584462166 2023-01-23 22:39:16.237551: step: 368/77, loss: 0.020626302808523178 2023-01-23 22:39:17.550185: step: 372/77, loss: 0.00031090303673408926 2023-01-23 22:39:18.869524: step: 376/77, loss: 0.013201514258980751 2023-01-23 22:39:20.151747: step: 380/77, loss: 0.0004937460180372 2023-01-23 22:39:21.442825: step: 384/77, loss: 0.017196908593177795 2023-01-23 22:39:22.795537: step: 388/77, loss: 0.08186633139848709 ================================================== Loss: 0.025 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Chinese: {'template': {'p': 0.971830985915493, 'r': 0.5267175572519084, 'f1': 0.6831683168316832}, 'slot': {'p': 0.5, 'r': 0.009033423667570008, 'f1': 0.01774622892635315}, 'combined': 0.01212366134572641, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Korean: {'template': {'p': 0.971830985915493, 'r': 0.5267175572519084, 'f1': 0.6831683168316832}, 'slot': {'p': 0.5, 'r': 0.009033423667570008, 'f1': 0.01774622892635315}, 'combined': 0.01212366134572641, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Russian: {'template': {'p': 0.9722222222222222, 'r': 0.5343511450381679, 'f1': 0.6896551724137931}, 'slot': {'p': 0.5, 'r': 0.009033423667570008, 'f1': 0.01774622892635315}, 'combined': 0.012238778569898724, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 9 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:41:05.376291: step: 4/77, loss: 0.0032547954469919205 2023-01-23 22:41:06.746261: step: 8/77, loss: 0.0007828651578165591 2023-01-23 22:41:07.994903: step: 12/77, loss: 0.009474514983594418 2023-01-23 22:41:09.288322: step: 16/77, loss: 0.026587102562189102 2023-01-23 22:41:10.627894: step: 20/77, loss: 0.016950704157352448 2023-01-23 22:41:11.977447: step: 24/77, loss: 0.004516863729804754 2023-01-23 22:41:13.280021: step: 28/77, loss: 0.0068637914955616 2023-01-23 22:41:14.565990: step: 32/77, loss: 0.008322927169501781 2023-01-23 22:41:15.852561: step: 36/77, loss: 0.003309592604637146 2023-01-23 22:41:17.128363: step: 40/77, loss: 0.01013989932835102 2023-01-23 22:41:18.432222: step: 44/77, loss: 0.004847818054258823 2023-01-23 22:41:19.712298: step: 48/77, loss: 0.030007850378751755 2023-01-23 22:41:20.972383: step: 52/77, loss: 0.02581692300736904 2023-01-23 22:41:22.314428: step: 56/77, loss: 0.015968669205904007 2023-01-23 22:41:23.590694: step: 60/77, loss: 0.016311904415488243 2023-01-23 22:41:24.889775: step: 64/77, loss: 0.03854527324438095 2023-01-23 22:41:26.201385: step: 68/77, loss: 0.020433874800801277 2023-01-23 22:41:27.491447: step: 72/77, loss: 0.014859223738312721 2023-01-23 22:41:28.766178: step: 76/77, loss: 0.00025919050676748157 2023-01-23 22:41:30.009512: step: 80/77, loss: 0.02078193984925747 2023-01-23 22:41:31.251392: step: 84/77, loss: 0.014132988639175892 2023-01-23 22:41:32.547305: step: 88/77, loss: 0.012082832865417004 2023-01-23 22:41:33.783612: step: 92/77, loss: 0.0012587098171934485 2023-01-23 22:41:35.098682: step: 96/77, loss: 0.0025687245652079582 2023-01-23 22:41:36.367526: step: 100/77, loss: 0.031106337904930115 2023-01-23 22:41:37.642268: step: 104/77, loss: 0.012444172985851765 2023-01-23 22:41:38.906677: step: 108/77, loss: 0.01632830686867237 2023-01-23 22:41:40.211982: step: 112/77, loss: 0.001803108025342226 2023-01-23 22:41:41.510871: step: 116/77, loss: 0.00217154948040843 2023-01-23 22:41:42.765556: step: 120/77, loss: 0.027000855654478073 2023-01-23 22:41:44.083270: step: 124/77, loss: 0.01967952400445938 2023-01-23 22:41:45.402916: step: 128/77, loss: 0.012175975367426872 2023-01-23 22:41:46.722188: step: 132/77, loss: 0.0006821623537689447 2023-01-23 22:41:47.984959: step: 136/77, loss: 0.026813946664333344 2023-01-23 22:41:49.268918: step: 140/77, loss: 0.001025137840770185 2023-01-23 22:41:50.559326: step: 144/77, loss: 0.044895920902490616 2023-01-23 22:41:51.855682: step: 148/77, loss: 0.04364461451768875 2023-01-23 22:41:53.177901: step: 152/77, loss: 0.07442065328359604 2023-01-23 22:41:54.461910: step: 156/77, loss: 0.010177352465689182 2023-01-23 22:41:55.774050: step: 160/77, loss: 0.011536704376339912 2023-01-23 22:41:57.079605: step: 164/77, loss: 0.017821375280618668 2023-01-23 22:41:58.330460: step: 168/77, loss: 0.035762540996074677 2023-01-23 22:41:59.657349: step: 172/77, loss: 0.010910563170909882 2023-01-23 22:42:00.912726: step: 176/77, loss: 0.1267044097185135 2023-01-23 22:42:02.154907: step: 180/77, loss: 0.00936646293848753 2023-01-23 22:42:03.373816: step: 184/77, loss: 0.011003161780536175 2023-01-23 22:42:04.622236: step: 188/77, loss: 0.013133707456290722 2023-01-23 22:42:05.948489: step: 192/77, loss: 0.0047271656803786755 2023-01-23 22:42:07.243189: step: 196/77, loss: 0.027341635897755623 2023-01-23 22:42:08.560885: step: 200/77, loss: 0.02294914796948433 2023-01-23 22:42:09.881249: step: 204/77, loss: 0.020474707707762718 2023-01-23 22:42:11.172810: step: 208/77, loss: 0.0029907962307333946 2023-01-23 22:42:12.474428: step: 212/77, loss: 0.04676752910017967 2023-01-23 22:42:13.755169: step: 216/77, loss: 0.020993176847696304 2023-01-23 22:42:15.076778: step: 220/77, loss: 0.0022739083506166935 2023-01-23 22:42:16.322687: step: 224/77, loss: 0.004498450551182032 2023-01-23 22:42:17.615593: step: 228/77, loss: 0.014462015591561794 2023-01-23 22:42:18.904678: step: 232/77, loss: 0.011671274900436401 2023-01-23 22:42:20.198234: step: 236/77, loss: 0.004001801833510399 2023-01-23 22:42:21.501314: step: 240/77, loss: 0.009535029530525208 2023-01-23 22:42:22.760449: step: 244/77, loss: 0.01722276769578457 2023-01-23 22:42:24.051337: step: 248/77, loss: 0.008600283414125443 2023-01-23 22:42:25.330924: step: 252/77, loss: 0.04984879493713379 2023-01-23 22:42:26.666559: step: 256/77, loss: 0.01410811860114336 2023-01-23 22:42:27.936398: step: 260/77, loss: 0.00032312856637872756 2023-01-23 22:42:29.202889: step: 264/77, loss: 0.0010134776821359992 2023-01-23 22:42:30.483303: step: 268/77, loss: 0.0083998404443264 2023-01-23 22:42:31.766943: step: 272/77, loss: 0.01889784447848797 2023-01-23 22:42:33.056065: step: 276/77, loss: 2.0189343558740802e-05 2023-01-23 22:42:34.342929: step: 280/77, loss: 0.010106510482728481 2023-01-23 22:42:35.665997: step: 284/77, loss: 0.01588715799152851 2023-01-23 22:42:36.953366: step: 288/77, loss: 0.0029431162402033806 2023-01-23 22:42:38.233308: step: 292/77, loss: 0.03170425444841385 2023-01-23 22:42:39.525123: step: 296/77, loss: 0.011810568161308765 2023-01-23 22:42:40.841638: step: 300/77, loss: 0.016197899356484413 2023-01-23 22:42:42.125426: step: 304/77, loss: 0.01205252856016159 2023-01-23 22:42:43.424502: step: 308/77, loss: 0.0011092599015682936 2023-01-23 22:42:44.735400: step: 312/77, loss: 0.0018312877509742975 2023-01-23 22:42:46.036658: step: 316/77, loss: 0.053495053201913834 2023-01-23 22:42:47.294744: step: 320/77, loss: 0.007342047058045864 2023-01-23 22:42:48.575241: step: 324/77, loss: 0.005907184444367886 2023-01-23 22:42:49.842461: step: 328/77, loss: 0.00495109474286437 2023-01-23 22:42:51.131849: step: 332/77, loss: 0.009428643621504307 2023-01-23 22:42:52.420728: step: 336/77, loss: 0.02674906887114048 2023-01-23 22:42:53.746767: step: 340/77, loss: 0.016145536676049232 2023-01-23 22:42:55.083188: step: 344/77, loss: 0.020640820264816284 2023-01-23 22:42:56.324407: step: 348/77, loss: 0.04335368797183037 2023-01-23 22:42:57.564738: step: 352/77, loss: 0.014391623437404633 2023-01-23 22:42:58.811537: step: 356/77, loss: 8.42200024635531e-05 2023-01-23 22:43:00.149855: step: 360/77, loss: 2.42486839852063e-05 2023-01-23 22:43:01.484375: step: 364/77, loss: 0.008449288085103035 2023-01-23 22:43:02.829146: step: 368/77, loss: 0.03136660158634186 2023-01-23 22:43:04.132392: step: 372/77, loss: 0.002738418523222208 2023-01-23 22:43:05.434603: step: 376/77, loss: 0.022241732105612755 2023-01-23 22:43:06.731934: step: 380/77, loss: 0.007063223980367184 2023-01-23 22:43:08.061990: step: 384/77, loss: 0.02287823148071766 2023-01-23 22:43:09.338313: step: 388/77, loss: 0.07854347676038742 ================================================== Loss: 0.017 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Chinese: {'template': {'p': 0.9594594594594594, 'r': 0.5419847328244275, 'f1': 0.6926829268292682}, 'slot': {'p': 0.42424242424242425, 'r': 0.012646793134598013, 'f1': 0.024561403508771933}, 'combined': 0.017013264869490802, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Korean: {'template': {'p': 0.9594594594594594, 'r': 0.5419847328244275, 'f1': 0.6926829268292682}, 'slot': {'p': 0.4375, 'r': 0.012646793134598013, 'f1': 0.024582967515364356}, 'combined': 0.017028201888691405, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Russian: {'template': {'p': 0.9594594594594594, 'r': 0.5419847328244275, 'f1': 0.6926829268292682}, 'slot': {'p': 0.45454545454545453, 'r': 0.013550135501355014, 'f1': 0.02631578947368421}, 'combined': 0.018228498074454428, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 10 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:44:50.506853: step: 4/77, loss: 0.0334661565721035 2023-01-23 22:44:51.741897: step: 8/77, loss: 0.0053428830578923225 2023-01-23 22:44:53.032204: step: 12/77, loss: 0.012940764427185059 2023-01-23 22:44:54.339824: step: 16/77, loss: 0.03215727582573891 2023-01-23 22:44:55.649522: step: 20/77, loss: 0.01621071994304657 2023-01-23 22:44:56.930046: step: 24/77, loss: 6.636667239945382e-05 2023-01-23 22:44:58.186703: step: 28/77, loss: 0.0008667551446706057 2023-01-23 22:44:59.527666: step: 32/77, loss: 0.027174003422260284 2023-01-23 22:45:00.803580: step: 36/77, loss: 0.010804384015500546 2023-01-23 22:45:02.130958: step: 40/77, loss: 0.0023620270658284426 2023-01-23 22:45:03.412795: step: 44/77, loss: 0.007145829498767853 2023-01-23 22:45:04.685373: step: 48/77, loss: 0.009835539385676384 2023-01-23 22:45:06.009998: step: 52/77, loss: 0.002078540623188019 2023-01-23 22:45:07.293404: step: 56/77, loss: 0.0011278409510850906 2023-01-23 22:45:08.591268: step: 60/77, loss: 0.019689617678523064 2023-01-23 22:45:09.813721: step: 64/77, loss: 0.02498197928071022 2023-01-23 22:45:11.134597: step: 68/77, loss: 0.0023460511583834887 2023-01-23 22:45:12.371601: step: 72/77, loss: 0.05652249976992607 2023-01-23 22:45:13.695734: step: 76/77, loss: 0.038843028247356415 2023-01-23 22:45:14.978445: step: 80/77, loss: 0.036456163972616196 2023-01-23 22:45:16.281731: step: 84/77, loss: 0.12923657894134521 2023-01-23 22:45:17.548801: step: 88/77, loss: 0.004103332292288542 2023-01-23 22:45:18.812160: step: 92/77, loss: 0.030966389924287796 2023-01-23 22:45:20.057653: step: 96/77, loss: 0.002118196338415146 2023-01-23 22:45:21.373805: step: 100/77, loss: 0.007329493761062622 2023-01-23 22:45:22.649610: step: 104/77, loss: 0.002037853468209505 2023-01-23 22:45:23.938353: step: 108/77, loss: 7.579373777844012e-05 2023-01-23 22:45:25.216567: step: 112/77, loss: 0.042532891035079956 2023-01-23 22:45:26.491870: step: 116/77, loss: 0.006116456817835569 2023-01-23 22:45:27.849157: step: 120/77, loss: 0.05537469685077667 2023-01-23 22:45:29.170167: step: 124/77, loss: 0.06468012183904648 2023-01-23 22:45:30.422842: step: 128/77, loss: 0.004855441860854626 2023-01-23 22:45:31.728014: step: 132/77, loss: 0.015735818073153496 2023-01-23 22:45:33.005228: step: 136/77, loss: 0.00012620292545761913 2023-01-23 22:45:34.273532: step: 140/77, loss: 0.007197881117463112 2023-01-23 22:45:35.626352: step: 144/77, loss: 7.069560524541885e-05 2023-01-23 22:45:36.931835: step: 148/77, loss: 0.00013055774616077542 2023-01-23 22:45:38.211603: step: 152/77, loss: 0.0017158358823508024 2023-01-23 22:45:39.467482: step: 156/77, loss: 0.034569624811410904 2023-01-23 22:45:40.734287: step: 160/77, loss: 0.0027393088676035404 2023-01-23 22:45:42.022694: step: 164/77, loss: 0.010289029218256474 2023-01-23 22:45:43.332817: step: 168/77, loss: 0.0027779859956353903 2023-01-23 22:45:44.604978: step: 172/77, loss: 0.0007916667382232845 2023-01-23 22:45:45.895287: step: 176/77, loss: 0.20995254814624786 2023-01-23 22:45:47.192772: step: 180/77, loss: 0.1505231112241745 2023-01-23 22:45:48.549331: step: 184/77, loss: 0.0010892642894759774 2023-01-23 22:45:49.855478: step: 188/77, loss: 0.012236889451742172 2023-01-23 22:45:51.170207: step: 192/77, loss: 0.005136884283274412 2023-01-23 22:45:52.450293: step: 196/77, loss: 0.03500115126371384 2023-01-23 22:45:53.722406: step: 200/77, loss: 0.024591337889432907 2023-01-23 22:45:55.020376: step: 204/77, loss: 0.018696283921599388 2023-01-23 22:45:56.288066: step: 208/77, loss: 0.010986441746354103 2023-01-23 22:45:57.613077: step: 212/77, loss: 0.08380329608917236 2023-01-23 22:45:58.909536: step: 216/77, loss: 0.02270100638270378 2023-01-23 22:46:00.173808: step: 220/77, loss: 0.00021239221678115427 2023-01-23 22:46:01.443145: step: 224/77, loss: 0.026787076145410538 2023-01-23 22:46:02.795444: step: 228/77, loss: 0.08860864490270615 2023-01-23 22:46:04.133308: step: 232/77, loss: 0.00024117573047988117 2023-01-23 22:46:05.466418: step: 236/77, loss: 0.0002482231648173183 2023-01-23 22:46:06.806051: step: 240/77, loss: 0.00014384181122295558 2023-01-23 22:46:08.082870: step: 244/77, loss: 0.03744838014245033 2023-01-23 22:46:09.364797: step: 248/77, loss: 0.006567574106156826 2023-01-23 22:46:10.701954: step: 252/77, loss: 0.017972031608223915 2023-01-23 22:46:12.036714: step: 256/77, loss: 0.0023253969848155975 2023-01-23 22:46:13.313070: step: 260/77, loss: 0.02187388949096203 2023-01-23 22:46:14.665280: step: 264/77, loss: 0.01341575849801302 2023-01-23 22:46:15.957479: step: 268/77, loss: 0.004433206748217344 2023-01-23 22:46:17.216152: step: 272/77, loss: 0.007166629657149315 2023-01-23 22:46:18.520227: step: 276/77, loss: 0.017453910782933235 2023-01-23 22:46:19.868737: step: 280/77, loss: 0.04777868464589119 2023-01-23 22:46:21.188424: step: 284/77, loss: 0.01083751954138279 2023-01-23 22:46:22.447569: step: 288/77, loss: 0.007762902416288853 2023-01-23 22:46:23.738157: step: 292/77, loss: 0.03866489231586456 2023-01-23 22:46:24.997260: step: 296/77, loss: 0.016943011432886124 2023-01-23 22:46:26.311177: step: 300/77, loss: 0.0027338904328644276 2023-01-23 22:46:27.568121: step: 304/77, loss: 0.00044834212167188525 2023-01-23 22:46:28.834470: step: 308/77, loss: 0.15176521241664886 2023-01-23 22:46:30.121682: step: 312/77, loss: 0.0882883220911026 2023-01-23 22:46:31.448546: step: 316/77, loss: 0.04136144742369652 2023-01-23 22:46:32.755872: step: 320/77, loss: 0.03687924146652222 2023-01-23 22:46:34.014269: step: 324/77, loss: 0.0247676782310009 2023-01-23 22:46:35.331831: step: 328/77, loss: 0.004467879422008991 2023-01-23 22:46:36.658736: step: 332/77, loss: 0.08892552554607391 2023-01-23 22:46:37.938687: step: 336/77, loss: 0.043028660118579865 2023-01-23 22:46:39.246768: step: 340/77, loss: 0.004968172404915094 2023-01-23 22:46:40.516757: step: 344/77, loss: 0.0011426556156948209 2023-01-23 22:46:41.788845: step: 348/77, loss: 0.022866141051054 2023-01-23 22:46:43.088847: step: 352/77, loss: 0.02279716543853283 2023-01-23 22:46:44.395201: step: 356/77, loss: 0.08208740502595901 2023-01-23 22:46:45.724644: step: 360/77, loss: 0.0033330784644931555 2023-01-23 22:46:47.006697: step: 364/77, loss: 0.02521967515349388 2023-01-23 22:46:48.281197: step: 368/77, loss: 1.1281890692771412e-05 2023-01-23 22:46:49.588260: step: 372/77, loss: 0.005154577549546957 2023-01-23 22:46:50.931202: step: 376/77, loss: 0.0027619428001344204 2023-01-23 22:46:52.269282: step: 380/77, loss: 0.0311444029211998 2023-01-23 22:46:53.554482: step: 384/77, loss: 0.031260937452316284 2023-01-23 22:46:54.813373: step: 388/77, loss: 0.023465558886528015 ================================================== Loss: 0.025 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Chinese: {'template': {'p': 0.9857142857142858, 'r': 0.5267175572519084, 'f1': 0.6865671641791045}, 'slot': {'p': 0.5, 'r': 0.00993676603432701, 'f1': 0.01948627103631532}, 'combined': 0.013378633845828429, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Korean: {'template': {'p': 0.9855072463768116, 'r': 0.5190839694656488, 'f1': 0.68}, 'slot': {'p': 0.5, 'r': 0.00993676603432701, 'f1': 0.01948627103631532}, 'combined': 0.01325066430469442, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Russian: {'template': {'p': 0.9857142857142858, 'r': 0.5267175572519084, 'f1': 0.6865671641791045}, 'slot': {'p': 0.4782608695652174, 'r': 0.00993676603432701, 'f1': 0.019469026548672566}, 'combined': 0.013366794346849821, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 11 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:48:37.807796: step: 4/77, loss: 0.01584513485431671 2023-01-23 22:48:39.126247: step: 8/77, loss: 0.011110379360616207 2023-01-23 22:48:40.416790: step: 12/77, loss: 0.007822764106094837 2023-01-23 22:48:41.716008: step: 16/77, loss: 0.003719520289450884 2023-01-23 22:48:43.032588: step: 20/77, loss: 0.016872625797986984 2023-01-23 22:48:44.337247: step: 24/77, loss: 5.1765411626547575e-05 2023-01-23 22:48:45.619998: step: 28/77, loss: 0.035905055701732635 2023-01-23 22:48:46.901012: step: 32/77, loss: 0.012639195658266544 2023-01-23 22:48:48.204475: step: 36/77, loss: 0.003601525444537401 2023-01-23 22:48:49.579074: step: 40/77, loss: 0.013114494271576405 2023-01-23 22:48:50.858550: step: 44/77, loss: 0.012906393967568874 2023-01-23 22:48:52.178917: step: 48/77, loss: 0.00372703792527318 2023-01-23 22:48:53.527709: step: 52/77, loss: 0.006472621578723192 2023-01-23 22:48:54.818146: step: 56/77, loss: 8.66968184709549e-05 2023-01-23 22:48:56.096829: step: 60/77, loss: 0.0013746072072535753 2023-01-23 22:48:57.349054: step: 64/77, loss: 0.03838416934013367 2023-01-23 22:48:58.619017: step: 68/77, loss: 0.0019289760384708643 2023-01-23 22:48:59.916817: step: 72/77, loss: 0.002471780404448509 2023-01-23 22:49:01.205400: step: 76/77, loss: 0.026333516463637352 2023-01-23 22:49:02.454981: step: 80/77, loss: 0.012460384517908096 2023-01-23 22:49:03.765634: step: 84/77, loss: 0.04798956960439682 2023-01-23 22:49:05.106050: step: 88/77, loss: 0.0010338500142097473 2023-01-23 22:49:06.412723: step: 92/77, loss: 0.0032949044834822416 2023-01-23 22:49:07.723167: step: 96/77, loss: 0.016322335228323936 2023-01-23 22:49:09.029038: step: 100/77, loss: 0.0018908950733020902 2023-01-23 22:49:10.323891: step: 104/77, loss: 0.017116904258728027 2023-01-23 22:49:11.626158: step: 108/77, loss: 0.06026684492826462 2023-01-23 22:49:12.962991: step: 112/77, loss: 0.019831260666251183 2023-01-23 22:49:14.253065: step: 116/77, loss: 0.009897179901599884 2023-01-23 22:49:15.580043: step: 120/77, loss: 0.10649476200342178 2023-01-23 22:49:16.862976: step: 124/77, loss: 0.006906839553266764 2023-01-23 22:49:18.165977: step: 128/77, loss: 0.011276248842477798 2023-01-23 22:49:19.482124: step: 132/77, loss: 0.006750210653990507 2023-01-23 22:49:20.763752: step: 136/77, loss: 0.017680184915661812 2023-01-23 22:49:22.041505: step: 140/77, loss: 0.00014472060138359666 2023-01-23 22:49:23.355727: step: 144/77, loss: 0.013520104810595512 2023-01-23 22:49:24.655728: step: 148/77, loss: 0.012814250774681568 2023-01-23 22:49:25.965040: step: 152/77, loss: 0.011074498295783997 2023-01-23 22:49:27.252279: step: 156/77, loss: 0.050522495061159134 2023-01-23 22:49:28.563012: step: 160/77, loss: 0.0018916124245151877 2023-01-23 22:49:29.838889: step: 164/77, loss: 0.02445100247859955 2023-01-23 22:49:31.167375: step: 168/77, loss: 0.05349840223789215 2023-01-23 22:49:32.436137: step: 172/77, loss: 0.04895170032978058 2023-01-23 22:49:33.715710: step: 176/77, loss: 7.250769704114646e-05 2023-01-23 22:49:35.025064: step: 180/77, loss: 0.002005907241255045 2023-01-23 22:49:36.299974: step: 184/77, loss: 0.028574928641319275 2023-01-23 22:49:37.599505: step: 188/77, loss: 0.008724729530513287 2023-01-23 22:49:38.917105: step: 192/77, loss: 0.02186712622642517 2023-01-23 22:49:40.215787: step: 196/77, loss: 0.04871781915426254 2023-01-23 22:49:41.537337: step: 200/77, loss: 0.008077450096607208 2023-01-23 22:49:42.837136: step: 204/77, loss: 0.010768383741378784 2023-01-23 22:49:44.157241: step: 208/77, loss: 0.010555818676948547 2023-01-23 22:49:45.460727: step: 212/77, loss: 0.03762510418891907 2023-01-23 22:49:46.747958: step: 216/77, loss: 0.0065259141847491264 2023-01-23 22:49:48.054298: step: 220/77, loss: 0.01817585900425911 2023-01-23 22:49:49.380534: step: 224/77, loss: 0.005752554163336754 2023-01-23 22:49:50.684542: step: 228/77, loss: 0.021643973886966705 2023-01-23 22:49:51.973230: step: 232/77, loss: 0.04298442602157593 2023-01-23 22:49:53.285161: step: 236/77, loss: 0.01501567754894495 2023-01-23 22:49:54.570248: step: 240/77, loss: 0.006192583125084639 2023-01-23 22:49:55.850431: step: 244/77, loss: 0.004610065370798111 2023-01-23 22:49:57.118770: step: 248/77, loss: 0.0006969093228690326 2023-01-23 22:49:58.405047: step: 252/77, loss: 0.07962727546691895 2023-01-23 22:49:59.676010: step: 256/77, loss: 0.01237800344824791 2023-01-23 22:50:01.014243: step: 260/77, loss: 0.027706127613782883 2023-01-23 22:50:02.304459: step: 264/77, loss: 0.00938791036605835 2023-01-23 22:50:03.594119: step: 268/77, loss: 0.024889472872018814 2023-01-23 22:50:04.861461: step: 272/77, loss: 0.04661480337381363 2023-01-23 22:50:06.113488: step: 276/77, loss: 0.0010684910230338573 2023-01-23 22:50:07.431174: step: 280/77, loss: 0.009528428316116333 2023-01-23 22:50:08.725139: step: 284/77, loss: 0.03483160585165024 2023-01-23 22:50:09.973552: step: 288/77, loss: 0.011898556724190712 2023-01-23 22:50:11.298743: step: 292/77, loss: 0.00047971383901312947 2023-01-23 22:50:12.566726: step: 296/77, loss: 0.061640918254852295 2023-01-23 22:50:13.888887: step: 300/77, loss: 0.0016488262917846441 2023-01-23 22:50:15.176191: step: 304/77, loss: 0.02181958593428135 2023-01-23 22:50:16.499100: step: 308/77, loss: 0.004883064888417721 2023-01-23 22:50:17.831067: step: 312/77, loss: 0.00666409358382225 2023-01-23 22:50:19.097327: step: 316/77, loss: 0.06552506983280182 2023-01-23 22:50:20.360808: step: 320/77, loss: 0.00029439933132380247 2023-01-23 22:50:21.675943: step: 324/77, loss: 0.0016701119020581245 2023-01-23 22:50:22.933112: step: 328/77, loss: 0.013529755175113678 2023-01-23 22:50:24.281986: step: 332/77, loss: 0.019527696073055267 2023-01-23 22:50:25.552307: step: 336/77, loss: 0.020140524953603745 2023-01-23 22:50:26.835043: step: 340/77, loss: 0.018858810886740685 2023-01-23 22:50:28.135212: step: 344/77, loss: 0.011388593353331089 2023-01-23 22:50:29.426221: step: 348/77, loss: 0.0008530689519830048 2023-01-23 22:50:30.731340: step: 352/77, loss: 0.002056154888123274 2023-01-23 22:50:32.050765: step: 356/77, loss: 0.009871527552604675 2023-01-23 22:50:33.328343: step: 360/77, loss: 0.0023899408988654613 2023-01-23 22:50:34.642135: step: 364/77, loss: 0.0003346440498717129 2023-01-23 22:50:35.942582: step: 368/77, loss: 0.0314149372279644 2023-01-23 22:50:37.241770: step: 372/77, loss: 0.005590423475950956 2023-01-23 22:50:38.518802: step: 376/77, loss: 0.005169984884560108 2023-01-23 22:50:39.821776: step: 380/77, loss: 0.03703976050019264 2023-01-23 22:50:41.153222: step: 384/77, loss: 0.0014688130468130112 2023-01-23 22:50:42.440526: step: 388/77, loss: 0.01273888349533081 ================================================== Loss: 0.017 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Chinese: {'template': {'p': 0.9259259259259259, 'r': 0.5725190839694656, 'f1': 0.7075471698113207}, 'slot': {'p': 0.4, 'r': 0.016260162601626018, 'f1': 0.03125000000000001}, 'combined': 0.022110849056603776, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Korean: {'template': {'p': 0.9240506329113924, 'r': 0.5572519083969466, 'f1': 0.6952380952380953}, 'slot': {'p': 0.38636363636363635, 'r': 0.015356820234869015, 'f1': 0.02953953084274544}, 'combined': 0.02053700715733731, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Russian: {'template': {'p': 0.9146341463414634, 'r': 0.5725190839694656, 'f1': 0.704225352112676}, 'slot': {'p': 0.3829787234042553, 'r': 0.016260162601626018, 'f1': 0.03119584055459273}, 'combined': 0.021968901799008962, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 12 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:52:25.199904: step: 4/77, loss: 0.056859564036130905 2023-01-23 22:52:26.491221: step: 8/77, loss: 0.00018933211686089635 2023-01-23 22:52:27.782675: step: 12/77, loss: 0.0015684763202443719 2023-01-23 22:52:29.031275: step: 16/77, loss: 0.0037419775035232306 2023-01-23 22:52:30.306460: step: 20/77, loss: 0.00010413004201836884 2023-01-23 22:52:31.591889: step: 24/77, loss: 0.0036584739573299885 2023-01-23 22:52:32.871853: step: 28/77, loss: 0.006588386371731758 2023-01-23 22:52:34.147145: step: 32/77, loss: 0.012956037186086178 2023-01-23 22:52:35.484526: step: 36/77, loss: 0.05439409613609314 2023-01-23 22:52:36.812664: step: 40/77, loss: 0.08067908883094788 2023-01-23 22:52:38.133245: step: 44/77, loss: 0.028869925066828728 2023-01-23 22:52:39.403856: step: 48/77, loss: 0.0002178424911107868 2023-01-23 22:52:40.653328: step: 52/77, loss: 0.04830165579915047 2023-01-23 22:52:41.980302: step: 56/77, loss: 0.0012148318346589804 2023-01-23 22:52:43.304212: step: 60/77, loss: 0.0006402688450179994 2023-01-23 22:52:44.569807: step: 64/77, loss: 0.0008659854647703469 2023-01-23 22:52:45.864546: step: 68/77, loss: 0.03003714233636856 2023-01-23 22:52:47.175402: step: 72/77, loss: 0.02386688068509102 2023-01-23 22:52:48.468670: step: 76/77, loss: 0.042517803609371185 2023-01-23 22:52:49.679113: step: 80/77, loss: 0.0011577220866456628 2023-01-23 22:52:50.963930: step: 84/77, loss: 0.010854844003915787 2023-01-23 22:52:52.244497: step: 88/77, loss: 0.040769848972558975 2023-01-23 22:52:53.519189: step: 92/77, loss: 0.0005059882532805204 2023-01-23 22:52:54.807244: step: 96/77, loss: 0.016970816999673843 2023-01-23 22:52:56.058047: step: 100/77, loss: 0.02442902699112892 2023-01-23 22:52:57.320238: step: 104/77, loss: 0.0003359513357281685 2023-01-23 22:52:58.625911: step: 108/77, loss: 0.04959928244352341 2023-01-23 22:52:59.975491: step: 112/77, loss: 0.0037586591206490993 2023-01-23 22:53:01.310701: step: 116/77, loss: 0.015655245631933212 2023-01-23 22:53:02.633013: step: 120/77, loss: 0.007982890121638775 2023-01-23 22:53:03.922204: step: 124/77, loss: 0.02343090996146202 2023-01-23 22:53:05.227789: step: 128/77, loss: 0.007850190624594688 2023-01-23 22:53:06.523769: step: 132/77, loss: 0.0009188792901113629 2023-01-23 22:53:07.852407: step: 136/77, loss: 0.000135556620080024 2023-01-23 22:53:09.116371: step: 140/77, loss: 0.0010540832299739122 2023-01-23 22:53:10.421092: step: 144/77, loss: 0.0083523690700531 2023-01-23 22:53:11.731365: step: 148/77, loss: 0.008881242014467716 2023-01-23 22:53:13.030227: step: 152/77, loss: 0.009107242338359356 2023-01-23 22:53:14.289613: step: 156/77, loss: 0.003329481929540634 2023-01-23 22:53:15.556295: step: 160/77, loss: 0.0020151210483163595 2023-01-23 22:53:16.816840: step: 164/77, loss: 0.006416513584554195 2023-01-23 22:53:18.108183: step: 168/77, loss: 0.0237065888941288 2023-01-23 22:53:19.385891: step: 172/77, loss: 0.004142489284276962 2023-01-23 22:53:20.672865: step: 176/77, loss: 0.013825026340782642 2023-01-23 22:53:21.941635: step: 180/77, loss: 0.003928538877516985 2023-01-23 22:53:23.242602: step: 184/77, loss: 0.02468213438987732 2023-01-23 22:53:24.527186: step: 188/77, loss: 0.01477570179849863 2023-01-23 22:53:25.811881: step: 192/77, loss: 0.04188038408756256 2023-01-23 22:53:27.100279: step: 196/77, loss: 0.012145506218075752 2023-01-23 22:53:28.448026: step: 200/77, loss: 0.0013958922354504466 2023-01-23 22:53:29.751223: step: 204/77, loss: 0.01585123874247074 2023-01-23 22:53:31.060686: step: 208/77, loss: 0.012287406250834465 2023-01-23 22:53:32.318738: step: 212/77, loss: 0.009149945341050625 2023-01-23 22:53:33.612755: step: 216/77, loss: 0.0019453726708889008 2023-01-23 22:53:34.943236: step: 220/77, loss: 0.0008188173524104059 2023-01-23 22:53:36.238725: step: 224/77, loss: 0.0005111164064146578 2023-01-23 22:53:37.582692: step: 228/77, loss: 0.0007848279201425612 2023-01-23 22:53:38.932880: step: 232/77, loss: 0.0243210569024086 2023-01-23 22:53:40.175852: step: 236/77, loss: 0.0005640236777253449 2023-01-23 22:53:41.487804: step: 240/77, loss: 0.0072174943052232265 2023-01-23 22:53:42.780559: step: 244/77, loss: 0.0010060467757284641 2023-01-23 22:53:44.021816: step: 248/77, loss: 0.01647023856639862 2023-01-23 22:53:45.310694: step: 252/77, loss: 0.012628314085304737 2023-01-23 22:53:46.624868: step: 256/77, loss: 0.0013272122014313936 2023-01-23 22:53:47.893742: step: 260/77, loss: 0.02216571941971779 2023-01-23 22:53:49.225200: step: 264/77, loss: 0.0006854430539533496 2023-01-23 22:53:50.509585: step: 268/77, loss: 0.009277165867388248 2023-01-23 22:53:51.793719: step: 272/77, loss: 0.010998756624758244 2023-01-23 22:53:53.112248: step: 276/77, loss: 0.0013080434873700142 2023-01-23 22:53:54.388848: step: 280/77, loss: 0.006445006933063269 2023-01-23 22:53:55.689404: step: 284/77, loss: 0.011819546110928059 2023-01-23 22:53:56.973326: step: 288/77, loss: 0.0968322679400444 2023-01-23 22:53:58.253520: step: 292/77, loss: 0.020905228331685066 2023-01-23 22:53:59.548306: step: 296/77, loss: 0.004556386265903711 2023-01-23 22:54:00.855365: step: 300/77, loss: 0.0011466899886727333 2023-01-23 22:54:02.181863: step: 304/77, loss: 0.03180212900042534 2023-01-23 22:54:03.491533: step: 308/77, loss: 0.0031903793569654226 2023-01-23 22:54:04.770561: step: 312/77, loss: 0.016256894916296005 2023-01-23 22:54:06.090745: step: 316/77, loss: 0.0044269743375480175 2023-01-23 22:54:07.399417: step: 320/77, loss: 0.0002046520821750164 2023-01-23 22:54:08.705019: step: 324/77, loss: 0.007200018502771854 2023-01-23 22:54:10.025873: step: 328/77, loss: 0.037378519773483276 2023-01-23 22:54:11.312783: step: 332/77, loss: 0.08489402383565903 2023-01-23 22:54:12.628921: step: 336/77, loss: 0.034434448927640915 2023-01-23 22:54:13.888336: step: 340/77, loss: 0.0072105564177036285 2023-01-23 22:54:15.233172: step: 344/77, loss: 0.00021478811686392874 2023-01-23 22:54:16.582855: step: 348/77, loss: 0.001809608656913042 2023-01-23 22:54:17.857592: step: 352/77, loss: 0.0011827481212094426 2023-01-23 22:54:19.101690: step: 356/77, loss: 0.012472348287701607 2023-01-23 22:54:20.435974: step: 360/77, loss: 0.0236879363656044 2023-01-23 22:54:21.779171: step: 364/77, loss: 0.014688440598547459 2023-01-23 22:54:23.061663: step: 368/77, loss: 0.0033161991741508245 2023-01-23 22:54:24.368909: step: 372/77, loss: 0.07218929380178452 2023-01-23 22:54:25.629884: step: 376/77, loss: 0.00468746293336153 2023-01-23 22:54:26.890149: step: 380/77, loss: 0.037831034511327744 2023-01-23 22:54:28.185313: step: 384/77, loss: 0.00019326545589137822 2023-01-23 22:54:29.528802: step: 388/77, loss: 0.0001353693223791197 ================================================== Loss: 0.015 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Chinese: {'template': {'p': 0.9726027397260274, 'r': 0.5419847328244275, 'f1': 0.696078431372549}, 'slot': {'p': 0.4375, 'r': 0.012646793134598013, 'f1': 0.024582967515364356}, 'combined': 0.01711167346657715, 'epoch': 12} Dev Korean: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 12} Test Korean: {'template': {'p': 0.9722222222222222, 'r': 0.5343511450381679, 'f1': 0.6896551724137931}, 'slot': {'p': 0.4375, 'r': 0.012646793134598013, 'f1': 0.024582967515364356}, 'combined': 0.01695377070025128, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5419847328244275, 'f1': 0.696078431372549}, 'slot': {'p': 0.46875, 'r': 0.013550135501355014, 'f1': 0.02633889376646181}, 'combined': 0.018333935857046946, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 13 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:56:12.796739: step: 4/77, loss: 0.0008186142658814788 2023-01-23 22:56:14.141016: step: 8/77, loss: 0.004512900486588478 2023-01-23 22:56:15.409058: step: 12/77, loss: 0.008835289627313614 2023-01-23 22:56:16.691962: step: 16/77, loss: 0.19354291260242462 2023-01-23 22:56:18.005061: step: 20/77, loss: 0.0005305854137986898 2023-01-23 22:56:19.329771: step: 24/77, loss: 0.001983162248507142 2023-01-23 22:56:20.662982: step: 28/77, loss: 0.01291932538151741 2023-01-23 22:56:21.943576: step: 32/77, loss: 0.0400528721511364 2023-01-23 22:56:23.246810: step: 36/77, loss: 0.0005998695851303637 2023-01-23 22:56:24.548963: step: 40/77, loss: 0.006728531792759895 2023-01-23 22:56:25.851258: step: 44/77, loss: 0.0033883636351674795 2023-01-23 22:56:27.145143: step: 48/77, loss: 0.1282605528831482 2023-01-23 22:56:28.473218: step: 52/77, loss: 0.0037158699706196785 2023-01-23 22:56:29.775447: step: 56/77, loss: 0.009680245071649551 2023-01-23 22:56:31.029067: step: 60/77, loss: 0.014932963997125626 2023-01-23 22:56:32.310669: step: 64/77, loss: 0.0074518099427223206 2023-01-23 22:56:33.579415: step: 68/77, loss: 0.013059078715741634 2023-01-23 22:56:34.904998: step: 72/77, loss: 0.0004891576245427132 2023-01-23 22:56:36.170510: step: 76/77, loss: 0.0008808997226879001 2023-01-23 22:56:37.465448: step: 80/77, loss: 0.007496487349271774 2023-01-23 22:56:38.773709: step: 84/77, loss: 0.4473806321620941 2023-01-23 22:56:40.063039: step: 88/77, loss: 0.022166451439261436 2023-01-23 22:56:41.366913: step: 92/77, loss: 0.07151596248149872 2023-01-23 22:56:42.650658: step: 96/77, loss: 0.009233084507286549 2023-01-23 22:56:43.941959: step: 100/77, loss: 0.004044557921588421 2023-01-23 22:56:45.240710: step: 104/77, loss: 0.009267174638807774 2023-01-23 22:56:46.571116: step: 108/77, loss: 0.0015090053202584386 2023-01-23 22:56:47.884255: step: 112/77, loss: 0.00926131196320057 2023-01-23 22:56:49.192189: step: 116/77, loss: 0.00021723141253460199 2023-01-23 22:56:50.460197: step: 120/77, loss: 0.01728326641023159 2023-01-23 22:56:51.693271: step: 124/77, loss: 0.0654454305768013 2023-01-23 22:56:52.962953: step: 128/77, loss: 0.015401189215481281 2023-01-23 22:56:54.205071: step: 132/77, loss: 0.0012254157336428761 2023-01-23 22:56:55.509643: step: 136/77, loss: 0.00106145441532135 2023-01-23 22:56:56.814762: step: 140/77, loss: 0.0007606762228533626 2023-01-23 22:56:58.134565: step: 144/77, loss: 0.00244477903470397 2023-01-23 22:56:59.368240: step: 148/77, loss: 0.040618762373924255 2023-01-23 22:57:00.658827: step: 152/77, loss: 0.002180933952331543 2023-01-23 22:57:01.951745: step: 156/77, loss: 0.00010296700929757208 2023-01-23 22:57:03.246871: step: 160/77, loss: 0.009789688512682915 2023-01-23 22:57:04.527740: step: 164/77, loss: 5.17836851940956e-05 2023-01-23 22:57:05.851521: step: 168/77, loss: 0.0018696163315325975 2023-01-23 22:57:07.173789: step: 172/77, loss: 0.002193837659433484 2023-01-23 22:57:08.438877: step: 176/77, loss: 0.050133366137742996 2023-01-23 22:57:09.708233: step: 180/77, loss: 0.008347946219146252 2023-01-23 22:57:11.010757: step: 184/77, loss: 0.0014724781503900886 2023-01-23 22:57:12.347678: step: 188/77, loss: 1.9858744053635746e-05 2023-01-23 22:57:13.688809: step: 192/77, loss: 0.04367201402783394 2023-01-23 22:57:14.998981: step: 196/77, loss: 0.007622504606842995 2023-01-23 22:57:16.328696: step: 200/77, loss: 0.01916610449552536 2023-01-23 22:57:17.660623: step: 204/77, loss: 0.0003585350641515106 2023-01-23 22:57:18.963475: step: 208/77, loss: 0.0012512040557339787 2023-01-23 22:57:20.235144: step: 212/77, loss: 0.004894533194601536 2023-01-23 22:57:21.499973: step: 216/77, loss: 0.01604030281305313 2023-01-23 22:57:22.825457: step: 220/77, loss: 0.004095867276191711 2023-01-23 22:57:24.102601: step: 224/77, loss: 1.4710599316458683e-05 2023-01-23 22:57:25.391194: step: 228/77, loss: 0.020325489342212677 2023-01-23 22:57:26.673518: step: 232/77, loss: 0.0009979141177609563 2023-01-23 22:57:27.936292: step: 236/77, loss: 0.01780639961361885 2023-01-23 22:57:29.240173: step: 240/77, loss: 0.003619758877903223 2023-01-23 22:57:30.532564: step: 244/77, loss: 0.0006508044898509979 2023-01-23 22:57:31.900509: step: 248/77, loss: 0.0022863191552460194 2023-01-23 22:57:33.178922: step: 252/77, loss: 0.007784360088407993 2023-01-23 22:57:34.459955: step: 256/77, loss: 0.008496375754475594 2023-01-23 22:57:35.764994: step: 260/77, loss: 0.00044161375262774527 2023-01-23 22:57:37.074551: step: 264/77, loss: 0.009069936349987984 2023-01-23 22:57:38.381675: step: 268/77, loss: 0.0013018647441640496 2023-01-23 22:57:39.705024: step: 272/77, loss: 0.0032529844902455807 2023-01-23 22:57:41.015130: step: 276/77, loss: 0.008116625249385834 2023-01-23 22:57:42.313161: step: 280/77, loss: 0.00029886772972531617 2023-01-23 22:57:43.611875: step: 284/77, loss: 0.001512476010248065 2023-01-23 22:57:44.891547: step: 288/77, loss: 0.004851474426686764 2023-01-23 22:57:46.183580: step: 292/77, loss: 0.0005368001293390989 2023-01-23 22:57:47.472933: step: 296/77, loss: 0.04036924988031387 2023-01-23 22:57:48.785560: step: 300/77, loss: 0.005227272864431143 2023-01-23 22:57:50.100610: step: 304/77, loss: 0.004941250197589397 2023-01-23 22:57:51.373751: step: 308/77, loss: 0.014837159775197506 2023-01-23 22:57:52.665703: step: 312/77, loss: 0.027678687125444412 2023-01-23 22:57:53.930858: step: 316/77, loss: 0.0037269440945237875 2023-01-23 22:57:55.224073: step: 320/77, loss: 0.016393287107348442 2023-01-23 22:57:56.520904: step: 324/77, loss: 0.0006741977413184941 2023-01-23 22:57:57.858215: step: 328/77, loss: 0.004779040813446045 2023-01-23 22:57:59.153068: step: 332/77, loss: 0.04116032272577286 2023-01-23 22:58:00.487559: step: 336/77, loss: 0.006824977695941925 2023-01-23 22:58:01.783315: step: 340/77, loss: 0.01795623078942299 2023-01-23 22:58:03.146621: step: 344/77, loss: 0.00019160851661581546 2023-01-23 22:58:04.474392: step: 348/77, loss: 0.00015201720816548914 2023-01-23 22:58:05.724821: step: 352/77, loss: 0.0006054152618162334 2023-01-23 22:58:07.056682: step: 356/77, loss: 0.025011781603097916 2023-01-23 22:58:08.391932: step: 360/77, loss: 0.00023869529832154512 2023-01-23 22:58:09.686105: step: 364/77, loss: 0.04313571751117706 2023-01-23 22:58:10.991779: step: 368/77, loss: 3.720049426192418e-05 2023-01-23 22:58:12.267045: step: 372/77, loss: 1.5758212612126954e-05 2023-01-23 22:58:13.524436: step: 376/77, loss: 0.038922298699617386 2023-01-23 22:58:14.809922: step: 380/77, loss: 0.00173748389352113 2023-01-23 22:58:16.055557: step: 384/77, loss: 0.0016290738712996244 2023-01-23 22:58:17.387614: step: 388/77, loss: 0.0012468572240322828 ================================================== Loss: 0.018 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Chinese: {'template': {'p': 0.9710144927536232, 'r': 0.5114503816793893, 'f1': 0.6699999999999999}, 'slot': {'p': 0.42857142857142855, 'r': 0.013550135501355014, 'f1': 0.02626970227670753}, 'combined': 0.017600700525394045, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Korean: {'template': {'p': 0.9714285714285714, 'r': 0.5190839694656488, 'f1': 0.6766169154228854}, 'slot': {'p': 0.4444444444444444, 'r': 0.014453477868112014, 'f1': 0.027996500437445317}, 'combined': 0.018942905768619712, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Russian: {'template': {'p': 0.9705882352941176, 'r': 0.5038167938931297, 'f1': 0.6633165829145728}, 'slot': {'p': 0.4, 'r': 0.012646793134598013, 'f1': 0.0245183887915937}, 'combined': 0.016263453871810895, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 14 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:00:00.277073: step: 4/77, loss: 0.0035647323820739985 2023-01-23 23:00:01.567251: step: 8/77, loss: 0.0007356871501542628 2023-01-23 23:00:02.837971: step: 12/77, loss: 0.0089479461312294 2023-01-23 23:00:04.139089: step: 16/77, loss: 0.006250545848160982 2023-01-23 23:00:05.404819: step: 20/77, loss: 0.0007717355620115995 2023-01-23 23:00:06.743296: step: 24/77, loss: 0.010335363447666168 2023-01-23 23:00:08.018238: step: 28/77, loss: 0.002224587369710207 2023-01-23 23:00:09.314784: step: 32/77, loss: 0.0004021697968710214 2023-01-23 23:00:10.610895: step: 36/77, loss: 0.002917732112109661 2023-01-23 23:00:11.914450: step: 40/77, loss: 0.022275084629654884 2023-01-23 23:00:13.189617: step: 44/77, loss: 0.01664186269044876 2023-01-23 23:00:14.501910: step: 48/77, loss: 0.0020927239675074816 2023-01-23 23:00:15.763018: step: 52/77, loss: 0.0007523642270825803 2023-01-23 23:00:17.051134: step: 56/77, loss: 0.024307558313012123 2023-01-23 23:00:18.368446: step: 60/77, loss: 0.003226712578907609 2023-01-23 23:00:19.661133: step: 64/77, loss: 0.008963115513324738 2023-01-23 23:00:20.916897: step: 68/77, loss: 0.010883467271924019 2023-01-23 23:00:22.181976: step: 72/77, loss: 0.00818499457091093 2023-01-23 23:00:23.502404: step: 76/77, loss: 0.004292977973818779 2023-01-23 23:00:24.763174: step: 80/77, loss: 0.016569361090660095 2023-01-23 23:00:26.040329: step: 84/77, loss: 7.611776527483016e-05 2023-01-23 23:00:27.341798: step: 88/77, loss: 0.0011977230897173285 2023-01-23 23:00:28.655523: step: 92/77, loss: 0.0015111572574824095 2023-01-23 23:00:30.001217: step: 96/77, loss: 0.0011353702284395695 2023-01-23 23:00:31.318374: step: 100/77, loss: 0.033886488527059555 2023-01-23 23:00:32.621920: step: 104/77, loss: 0.01745476759970188 2023-01-23 23:00:33.841364: step: 108/77, loss: 0.006723622791469097 2023-01-23 23:00:35.154127: step: 112/77, loss: 0.0001270908396691084 2023-01-23 23:00:36.444957: step: 116/77, loss: 0.0004910130519419909 2023-01-23 23:00:37.765671: step: 120/77, loss: 1.6201971448026597e-05 2023-01-23 23:00:39.062629: step: 124/77, loss: 0.0003250141453463584 2023-01-23 23:00:40.329741: step: 128/77, loss: 0.00020061932445969433 2023-01-23 23:00:41.592835: step: 132/77, loss: 0.00979650765657425 2023-01-23 23:00:42.923913: step: 136/77, loss: 0.0642743855714798 2023-01-23 23:00:44.210942: step: 140/77, loss: 0.00012315556523390114 2023-01-23 23:00:45.521400: step: 144/77, loss: 0.03417303040623665 2023-01-23 23:00:46.835479: step: 148/77, loss: 0.004369811620563269 2023-01-23 23:00:48.090697: step: 152/77, loss: 0.00010004561045207083 2023-01-23 23:00:49.344615: step: 156/77, loss: 0.006763361394405365 2023-01-23 23:00:50.652525: step: 160/77, loss: 0.00010431784903630614 2023-01-23 23:00:51.963277: step: 164/77, loss: 0.019510159268975258 2023-01-23 23:00:53.262635: step: 168/77, loss: 0.0006070785457268357 2023-01-23 23:00:54.566672: step: 172/77, loss: 9.138509631156921e-05 2023-01-23 23:00:55.852363: step: 176/77, loss: 0.003176189260557294 2023-01-23 23:00:57.168892: step: 180/77, loss: 0.00040366995381191373 2023-01-23 23:00:58.484884: step: 184/77, loss: 0.028564022853970528 2023-01-23 23:00:59.762223: step: 188/77, loss: 0.060488320887088776 2023-01-23 23:01:01.118328: step: 192/77, loss: 0.0003345083969179541 2023-01-23 23:01:02.403803: step: 196/77, loss: 0.011636440642178059 2023-01-23 23:01:03.674557: step: 200/77, loss: 0.005325679667294025 2023-01-23 23:01:04.935594: step: 204/77, loss: 0.00031878415029495955 2023-01-23 23:01:06.207012: step: 208/77, loss: 0.07789537310600281 2023-01-23 23:01:07.518258: step: 212/77, loss: 6.297907384578139e-05 2023-01-23 23:01:08.808505: step: 216/77, loss: 0.010719101876020432 2023-01-23 23:01:10.075122: step: 220/77, loss: 0.012533249333500862 2023-01-23 23:01:11.348750: step: 224/77, loss: 0.0012144611682742834 2023-01-23 23:01:12.660076: step: 228/77, loss: 0.0013060077326372266 2023-01-23 23:01:13.932818: step: 232/77, loss: 0.11131002008914948 2023-01-23 23:01:15.205165: step: 236/77, loss: 0.0572662279009819 2023-01-23 23:01:16.482198: step: 240/77, loss: 0.005486391019076109 2023-01-23 23:01:17.814048: step: 244/77, loss: 0.001085981959477067 2023-01-23 23:01:19.133263: step: 248/77, loss: 0.005080692004412413 2023-01-23 23:01:20.480185: step: 252/77, loss: 0.029977142810821533 2023-01-23 23:01:21.806682: step: 256/77, loss: 0.04266020283102989 2023-01-23 23:01:23.050297: step: 260/77, loss: 0.0010956295300275087 2023-01-23 23:01:24.344281: step: 264/77, loss: 0.0037063530180603266 2023-01-23 23:01:25.672897: step: 268/77, loss: 0.005354553461074829 2023-01-23 23:01:26.986629: step: 272/77, loss: 0.015287532471120358 2023-01-23 23:01:28.255360: step: 276/77, loss: 0.02026659995317459 2023-01-23 23:01:29.605196: step: 280/77, loss: 0.004163270350545645 2023-01-23 23:01:30.941153: step: 284/77, loss: 0.010022885166108608 2023-01-23 23:01:32.214315: step: 288/77, loss: 0.02518465742468834 2023-01-23 23:01:33.519502: step: 292/77, loss: 0.08042170852422714 2023-01-23 23:01:34.774176: step: 296/77, loss: 5.863496699021198e-05 2023-01-23 23:01:36.040162: step: 300/77, loss: 0.00013342482270672917 2023-01-23 23:01:37.341470: step: 304/77, loss: 0.0158902145922184 2023-01-23 23:01:38.733963: step: 308/77, loss: 0.024475159123539925 2023-01-23 23:01:39.998427: step: 312/77, loss: 0.0015556125435978174 2023-01-23 23:01:41.281721: step: 316/77, loss: 9.906681225402281e-05 2023-01-23 23:01:42.590674: step: 320/77, loss: 5.0956117775058374e-05 2023-01-23 23:01:43.901837: step: 324/77, loss: 0.0007658183458261192 2023-01-23 23:01:45.229063: step: 328/77, loss: 2.353433046664577e-05 2023-01-23 23:01:46.484386: step: 332/77, loss: 8.520779374521226e-05 2023-01-23 23:01:47.769454: step: 336/77, loss: 0.08126899600028992 2023-01-23 23:01:49.034965: step: 340/77, loss: 0.006573853548616171 2023-01-23 23:01:50.318954: step: 344/77, loss: 0.03134193271398544 2023-01-23 23:01:51.602998: step: 348/77, loss: 0.01700267754495144 2023-01-23 23:01:52.951771: step: 352/77, loss: 0.00022600368538405746 2023-01-23 23:01:54.292001: step: 356/77, loss: 0.020432688295841217 2023-01-23 23:01:55.572360: step: 360/77, loss: 0.000203250179765746 2023-01-23 23:01:56.921462: step: 364/77, loss: 0.05017153173685074 2023-01-23 23:01:58.244759: step: 368/77, loss: 0.007926249876618385 2023-01-23 23:01:59.530794: step: 372/77, loss: 0.0001213687501149252 2023-01-23 23:02:00.826723: step: 376/77, loss: 0.012438084930181503 2023-01-23 23:02:02.102429: step: 380/77, loss: 0.04841241613030434 2023-01-23 23:02:03.391630: step: 384/77, loss: 0.0060639334842562675 2023-01-23 23:02:04.653844: step: 388/77, loss: 0.00711484719067812 ================================================== Loss: 0.014 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 14} Test Chinese: {'template': {'p': 0.9558823529411765, 'r': 0.4961832061068702, 'f1': 0.6532663316582915}, 'slot': {'p': 0.5, 'r': 0.014453477868112014, 'f1': 0.028094820017559263}, 'combined': 0.018353400011470875, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 14} Test Korean: {'template': {'p': 0.9402985074626866, 'r': 0.48091603053435117, 'f1': 0.6363636363636365}, 'slot': {'p': 0.48484848484848486, 'r': 0.014453477868112014, 'f1': 0.02807017543859649}, 'combined': 0.017862838915470497, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 14} Test Russian: {'template': {'p': 0.9411764705882353, 'r': 0.48854961832061067, 'f1': 0.6432160804020101}, 'slot': {'p': 0.47058823529411764, 'r': 0.014453477868112014, 'f1': 0.028045574057843997}, 'combined': 0.018039364218110712, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 15 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:03:47.679415: step: 4/77, loss: 0.0002984795719385147 2023-01-23 23:03:48.974540: step: 8/77, loss: 5.2180635975673795e-05 2023-01-23 23:03:50.249844: step: 12/77, loss: 0.013850021176040173 2023-01-23 23:03:51.519633: step: 16/77, loss: 0.04257480800151825 2023-01-23 23:03:52.828623: step: 20/77, loss: 0.0013392434921115637 2023-01-23 23:03:54.143190: step: 24/77, loss: 0.014045661315321922 2023-01-23 23:03:55.467355: step: 28/77, loss: 0.001623183605261147 2023-01-23 23:03:56.718149: step: 32/77, loss: 0.004262380767613649 2023-01-23 23:03:58.028708: step: 36/77, loss: 0.03393692150712013 2023-01-23 23:03:59.227468: step: 40/77, loss: 0.0009387597674503922 2023-01-23 23:04:00.553747: step: 44/77, loss: 0.03857859969139099 2023-01-23 23:04:01.852871: step: 48/77, loss: 0.014586037024855614 2023-01-23 23:04:03.181906: step: 52/77, loss: 0.019439885392785072 2023-01-23 23:04:04.525527: step: 56/77, loss: 0.0005992836668156087 2023-01-23 23:04:05.795282: step: 60/77, loss: 0.00041433415026403964 2023-01-23 23:04:07.096044: step: 64/77, loss: 0.0048797884956002235 2023-01-23 23:04:08.360535: step: 68/77, loss: 0.0019731195643544197 2023-01-23 23:04:09.632475: step: 72/77, loss: 0.01246714685112238 2023-01-23 23:04:10.926192: step: 76/77, loss: 7.696136890444905e-05 2023-01-23 23:04:12.198929: step: 80/77, loss: 0.001765516703017056 2023-01-23 23:04:13.486221: step: 84/77, loss: 0.0006175404414534569 2023-01-23 23:04:14.852813: step: 88/77, loss: 1.5016041288617998e-05 2023-01-23 23:04:16.175336: step: 92/77, loss: 0.006689623463898897 2023-01-23 23:04:17.479193: step: 96/77, loss: 0.0002577479463070631 2023-01-23 23:04:18.788087: step: 100/77, loss: 0.0027033966034650803 2023-01-23 23:04:20.147743: step: 104/77, loss: 0.001462356187403202 2023-01-23 23:04:21.498566: step: 108/77, loss: 0.01454481203109026 2023-01-23 23:04:22.750425: step: 112/77, loss: 0.0029345231596380472 2023-01-23 23:04:24.061869: step: 116/77, loss: 0.009179867804050446 2023-01-23 23:04:25.378768: step: 120/77, loss: 0.00047651416389271617 2023-01-23 23:04:26.654753: step: 124/77, loss: 0.08397921919822693 2023-01-23 23:04:27.959839: step: 128/77, loss: 0.005399615503847599 2023-01-23 23:04:29.210199: step: 132/77, loss: 0.008599193766713142 2023-01-23 23:04:30.526218: step: 136/77, loss: 0.009753820486366749 2023-01-23 23:04:31.848265: step: 140/77, loss: 0.04863092303276062 2023-01-23 23:04:33.181634: step: 144/77, loss: 0.00037321558920666575 2023-01-23 23:04:34.461416: step: 148/77, loss: 0.00017475412460044026 2023-01-23 23:04:35.763559: step: 152/77, loss: 6.773445784347132e-05 2023-01-23 23:04:37.026849: step: 156/77, loss: 0.00019403685291763395 2023-01-23 23:04:38.380676: step: 160/77, loss: 6.706234125886112e-05 2023-01-23 23:04:39.639036: step: 164/77, loss: 0.0005820175283588469 2023-01-23 23:04:40.913219: step: 168/77, loss: 1.0372894394095056e-05 2023-01-23 23:04:42.174238: step: 172/77, loss: 0.00033901131246238947 2023-01-23 23:04:43.474729: step: 176/77, loss: 0.00014996797835920006 2023-01-23 23:04:44.750085: step: 180/77, loss: 0.06107831001281738 2023-01-23 23:04:46.010377: step: 184/77, loss: 0.004911118187010288 2023-01-23 23:04:47.272156: step: 188/77, loss: 0.002654495183378458 2023-01-23 23:04:48.600038: step: 192/77, loss: 0.024621155112981796 2023-01-23 23:04:49.937248: step: 196/77, loss: 2.0832656446145847e-05 2023-01-23 23:04:51.265642: step: 200/77, loss: 0.04934433847665787 2023-01-23 23:04:52.553418: step: 204/77, loss: 0.0053009772673249245 2023-01-23 23:04:53.840184: step: 208/77, loss: 0.00010297299013473094 2023-01-23 23:04:55.161177: step: 212/77, loss: 1.9658375094877556e-05 2023-01-23 23:04:56.439138: step: 216/77, loss: 0.003173955949023366 2023-01-23 23:04:57.714368: step: 220/77, loss: 1.160690499091288e-05 2023-01-23 23:04:59.050229: step: 224/77, loss: 0.009171406738460064 2023-01-23 23:05:00.386157: step: 228/77, loss: 0.016685236245393753 2023-01-23 23:05:01.696687: step: 232/77, loss: 0.0008765912498347461 2023-01-23 23:05:03.010369: step: 236/77, loss: 0.009719951078295708 2023-01-23 23:05:04.230010: step: 240/77, loss: 0.05772913247346878 2023-01-23 23:05:05.539269: step: 244/77, loss: 6.63778992020525e-05 2023-01-23 23:05:06.807435: step: 248/77, loss: 0.007578597869724035 2023-01-23 23:05:08.196267: step: 252/77, loss: 0.0036200552713125944 2023-01-23 23:05:09.426399: step: 256/77, loss: 0.013005263172090054 2023-01-23 23:05:10.745711: step: 260/77, loss: 0.03981100022792816 2023-01-23 23:05:11.997794: step: 264/77, loss: 0.014181100763380527 2023-01-23 23:05:13.319004: step: 268/77, loss: 0.018954459577798843 2023-01-23 23:05:14.587148: step: 272/77, loss: 0.00011651184468064457 2023-01-23 23:05:15.879630: step: 276/77, loss: 0.05421111360192299 2023-01-23 23:05:17.183650: step: 280/77, loss: 0.00025143398670479655 2023-01-23 23:05:18.460898: step: 284/77, loss: 0.0014813337475061417 2023-01-23 23:05:19.763402: step: 288/77, loss: 4.686432657763362e-05 2023-01-23 23:05:21.069380: step: 292/77, loss: 0.031259432435035706 2023-01-23 23:05:22.406756: step: 296/77, loss: 0.03233766555786133 2023-01-23 23:05:23.646818: step: 300/77, loss: 2.2500371414935216e-06 2023-01-23 23:05:24.942154: step: 304/77, loss: 0.00013911757559981197 2023-01-23 23:05:26.261512: step: 308/77, loss: 0.000564001442398876 2023-01-23 23:05:27.576464: step: 312/77, loss: 2.8925347578478977e-05 2023-01-23 23:05:28.853515: step: 316/77, loss: 0.002963209990411997 2023-01-23 23:05:30.143260: step: 320/77, loss: 7.018641917966306e-05 2023-01-23 23:05:31.456863: step: 324/77, loss: 0.0008802684023976326 2023-01-23 23:05:32.795155: step: 328/77, loss: 0.0026326999068260193 2023-01-23 23:05:34.100683: step: 332/77, loss: 0.03058941289782524 2023-01-23 23:05:35.429887: step: 336/77, loss: 1.5420877389260568e-05 2023-01-23 23:05:36.703208: step: 340/77, loss: 0.05822722986340523 2023-01-23 23:05:37.997088: step: 344/77, loss: 0.009377855807542801 2023-01-23 23:05:39.250751: step: 348/77, loss: 0.05395232513546944 2023-01-23 23:05:40.604123: step: 352/77, loss: 0.007040033116936684 2023-01-23 23:05:41.939322: step: 356/77, loss: 0.00036163668846711516 2023-01-23 23:05:43.259433: step: 360/77, loss: 0.00044713931856676936 2023-01-23 23:05:44.525753: step: 364/77, loss: 0.009938497096300125 2023-01-23 23:05:45.804927: step: 368/77, loss: 0.0006480221054516733 2023-01-23 23:05:47.107079: step: 372/77, loss: 0.008452756330370903 2023-01-23 23:05:48.391573: step: 376/77, loss: 0.021464571356773376 2023-01-23 23:05:49.637661: step: 380/77, loss: 0.00037464150227606297 2023-01-23 23:05:50.922737: step: 384/77, loss: 0.005581353325396776 2023-01-23 23:05:52.206350: step: 388/77, loss: 0.003710019402205944 ================================================== Loss: 0.011 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Chinese: {'template': {'p': 0.9701492537313433, 'r': 0.4961832061068702, 'f1': 0.6565656565656566}, 'slot': {'p': 0.48148148148148145, 'r': 0.011743450767841012, 'f1': 0.022927689594356263}, 'combined': 0.015053533572052092, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.48091603053435117, 'f1': 0.6428571428571428}, 'slot': {'p': 0.48148148148148145, 'r': 0.011743450767841012, 'f1': 0.022927689594356263}, 'combined': 0.01473922902494331, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Russian: {'template': {'p': 0.9848484848484849, 'r': 0.4961832061068702, 'f1': 0.6598984771573604}, 'slot': {'p': 0.48148148148148145, 'r': 0.011743450767841012, 'f1': 0.022927689594356263}, 'combined': 0.015129947448052355, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 16 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:07:35.057912: step: 4/77, loss: 0.0004463110235519707 2023-01-23 23:07:36.357098: step: 8/77, loss: 0.024274805560708046 2023-01-23 23:07:37.669663: step: 12/77, loss: 0.00424988241866231 2023-01-23 23:07:38.947479: step: 16/77, loss: 2.180452247557696e-05 2023-01-23 23:07:40.211258: step: 20/77, loss: 0.016084343194961548 2023-01-23 23:07:41.490277: step: 24/77, loss: 0.004575492814183235 2023-01-23 23:07:42.833678: step: 28/77, loss: 0.0007353266701102257 2023-01-23 23:07:44.089888: step: 32/77, loss: 1.3090648280922323e-05 2023-01-23 23:07:45.337845: step: 36/77, loss: 0.04062986001372337 2023-01-23 23:07:46.617955: step: 40/77, loss: 0.00453605130314827 2023-01-23 23:07:47.880586: step: 44/77, loss: 7.663781616429333e-06 2023-01-23 23:07:49.142722: step: 48/77, loss: 7.687980541959405e-06 2023-01-23 23:07:50.471905: step: 52/77, loss: 1.1422414900152944e-05 2023-01-23 23:07:51.732481: step: 56/77, loss: 0.01101649273186922 2023-01-23 23:07:53.024350: step: 60/77, loss: 0.00185338722076267 2023-01-23 23:07:54.331487: step: 64/77, loss: 0.005147712305188179 2023-01-23 23:07:55.675012: step: 68/77, loss: 0.02915601246058941 2023-01-23 23:07:56.954690: step: 72/77, loss: 0.0007723210146650672 2023-01-23 23:07:58.237724: step: 76/77, loss: 0.023317718878388405 2023-01-23 23:07:59.559812: step: 80/77, loss: 0.0010713314404711127 2023-01-23 23:08:00.875178: step: 84/77, loss: 0.00702679855749011 2023-01-23 23:08:02.151561: step: 88/77, loss: 0.00013156705244909972 2023-01-23 23:08:03.475573: step: 92/77, loss: 0.0008226665668189526 2023-01-23 23:08:04.745855: step: 96/77, loss: 0.003145791357383132 2023-01-23 23:08:05.976055: step: 100/77, loss: 0.0030092468950897455 2023-01-23 23:08:07.238025: step: 104/77, loss: 3.409211331018014e-06 2023-01-23 23:08:08.534936: step: 108/77, loss: 0.00950823538005352 2023-01-23 23:08:09.811350: step: 112/77, loss: 9.047974890563637e-05 2023-01-23 23:08:11.121001: step: 116/77, loss: 0.00112790591083467 2023-01-23 23:08:12.414970: step: 120/77, loss: 0.00042730997665785253 2023-01-23 23:08:13.723369: step: 124/77, loss: 0.04910646751523018 2023-01-23 23:08:15.031018: step: 128/77, loss: 0.0008218835573643446 2023-01-23 23:08:16.381761: step: 132/77, loss: 0.0006013006786815822 2023-01-23 23:08:17.664086: step: 136/77, loss: 0.012114128097891808 2023-01-23 23:08:19.008929: step: 140/77, loss: 1.411896118952427e-05 2023-01-23 23:08:20.354550: step: 144/77, loss: 0.06364770233631134 2023-01-23 23:08:21.641337: step: 148/77, loss: 0.002993387635797262 2023-01-23 23:08:22.884740: step: 152/77, loss: 2.0418836356839165e-05 2023-01-23 23:08:24.177597: step: 156/77, loss: 0.008779329247772694 2023-01-23 23:08:25.514557: step: 160/77, loss: 0.00208136229775846 2023-01-23 23:08:26.849107: step: 164/77, loss: 0.00047932719462551177 2023-01-23 23:08:28.141345: step: 168/77, loss: 0.025327226147055626 2023-01-23 23:08:29.425737: step: 172/77, loss: 0.007658546324819326 2023-01-23 23:08:30.709956: step: 176/77, loss: 0.003932743798941374 2023-01-23 23:08:32.009285: step: 180/77, loss: 0.0012501177843660116 2023-01-23 23:08:33.308740: step: 184/77, loss: 0.004465624690055847 2023-01-23 23:08:34.591048: step: 188/77, loss: 0.006403674371540546 2023-01-23 23:08:35.864415: step: 192/77, loss: 0.013701226562261581 2023-01-23 23:08:37.079200: step: 196/77, loss: 0.0032587507739663124 2023-01-23 23:08:38.397492: step: 200/77, loss: 0.01083880104124546 2023-01-23 23:08:39.707585: step: 204/77, loss: 0.0682225227355957 2023-01-23 23:08:41.033191: step: 208/77, loss: 0.004877833183854818 2023-01-23 23:08:42.318466: step: 212/77, loss: 0.03905593231320381 2023-01-23 23:08:43.614271: step: 216/77, loss: 0.02148747816681862 2023-01-23 23:08:44.907031: step: 220/77, loss: 0.004947171080857515 2023-01-23 23:08:46.228150: step: 224/77, loss: 4.1722978494362906e-07 2023-01-23 23:08:47.557590: step: 228/77, loss: 0.00029864057432860136 2023-01-23 23:08:48.871713: step: 232/77, loss: 0.02274949662387371 2023-01-23 23:08:50.143805: step: 236/77, loss: 0.00946321152150631 2023-01-23 23:08:51.402673: step: 240/77, loss: 0.14353521168231964 2023-01-23 23:08:52.686965: step: 244/77, loss: 0.00017940827819984406 2023-01-23 23:08:53.990198: step: 248/77, loss: 1.54062745423289e-05 2023-01-23 23:08:55.312438: step: 252/77, loss: 2.4544437110307626e-05 2023-01-23 23:08:56.611181: step: 256/77, loss: 0.003466332098469138 2023-01-23 23:08:57.952795: step: 260/77, loss: 0.10752370208501816 2023-01-23 23:08:59.270755: step: 264/77, loss: 0.03569445386528969 2023-01-23 23:09:00.561428: step: 268/77, loss: 1.0468648724781815e-05 2023-01-23 23:09:01.850768: step: 272/77, loss: 0.028262851759791374 2023-01-23 23:09:03.159817: step: 276/77, loss: 0.00022459420142695308 2023-01-23 23:09:04.486782: step: 280/77, loss: 0.013898893259465694 2023-01-23 23:09:05.845529: step: 284/77, loss: 0.007969505153596401 2023-01-23 23:09:07.193884: step: 288/77, loss: 2.6130308469873853e-05 2023-01-23 23:09:08.509692: step: 292/77, loss: 4.822365372092463e-05 2023-01-23 23:09:09.824197: step: 296/77, loss: 0.002555176615715027 2023-01-23 23:09:11.106120: step: 300/77, loss: 5.701354530174285e-05 2023-01-23 23:09:12.420838: step: 304/77, loss: 0.005560105200856924 2023-01-23 23:09:13.720364: step: 308/77, loss: 0.04731611907482147 2023-01-23 23:09:14.985197: step: 312/77, loss: 7.124312833184376e-05 2023-01-23 23:09:16.283111: step: 316/77, loss: 0.0011542986612766981 2023-01-23 23:09:17.596928: step: 320/77, loss: 3.0384167985175736e-05 2023-01-23 23:09:18.818663: step: 324/77, loss: 0.002147078514099121 2023-01-23 23:09:20.138734: step: 328/77, loss: 0.04028966650366783 2023-01-23 23:09:21.494472: step: 332/77, loss: 3.333506901981309e-05 2023-01-23 23:09:22.806716: step: 336/77, loss: 0.0029658996500074863 2023-01-23 23:09:24.128330: step: 340/77, loss: 0.003935584332793951 2023-01-23 23:09:25.381610: step: 344/77, loss: 0.014070598408579826 2023-01-23 23:09:26.624161: step: 348/77, loss: 0.0002728747494984418 2023-01-23 23:09:27.910795: step: 352/77, loss: 0.04345415160059929 2023-01-23 23:09:29.222371: step: 356/77, loss: 0.011620284989476204 2023-01-23 23:09:30.542752: step: 360/77, loss: 0.010270928032696247 2023-01-23 23:09:31.844113: step: 364/77, loss: 0.00018565382924862206 2023-01-23 23:09:33.118651: step: 368/77, loss: 0.0017443906981498003 2023-01-23 23:09:34.387762: step: 372/77, loss: 0.04768596962094307 2023-01-23 23:09:35.690255: step: 376/77, loss: 2.2521740902448073e-05 2023-01-23 23:09:37.016525: step: 380/77, loss: 0.03269355744123459 2023-01-23 23:09:38.297944: step: 384/77, loss: 0.041919857263565063 2023-01-23 23:09:39.601478: step: 388/77, loss: 0.032667119055986404 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Chinese: {'template': {'p': 0.9452054794520548, 'r': 0.5267175572519084, 'f1': 0.676470588235294}, 'slot': {'p': 0.4444444444444444, 'r': 0.014453477868112014, 'f1': 0.027996500437445317}, 'combined': 0.0189388091194483, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Korean: {'template': {'p': 0.9452054794520548, 'r': 0.5267175572519084, 'f1': 0.676470588235294}, 'slot': {'p': 0.45714285714285713, 'r': 0.014453477868112014, 'f1': 0.028021015761821366}, 'combined': 0.018955393015349747, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5343511450381679, 'f1': 0.6829268292682927}, 'slot': {'p': 0.45714285714285713, 'r': 0.014453477868112014, 'f1': 0.028021015761821366}, 'combined': 0.01913630344709752, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 17 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:11:22.613293: step: 4/77, loss: 0.054453346878290176 2023-01-23 23:11:23.912116: step: 8/77, loss: 0.002369248541072011 2023-01-23 23:11:25.212448: step: 12/77, loss: 0.00011414064647397026 2023-01-23 23:11:26.516959: step: 16/77, loss: 0.00033900514245033264 2023-01-23 23:11:27.795559: step: 20/77, loss: 6.442344601964578e-05 2023-01-23 23:11:29.058229: step: 24/77, loss: 0.001451255870051682 2023-01-23 23:11:30.348647: step: 28/77, loss: 5.508850790647557e-06 2023-01-23 23:11:31.623116: step: 32/77, loss: 1.4209333130565938e-05 2023-01-23 23:11:32.906914: step: 36/77, loss: 0.0001875080051831901 2023-01-23 23:11:34.196496: step: 40/77, loss: 0.00018672860460355878 2023-01-23 23:11:35.502361: step: 44/77, loss: 3.9590067899553105e-06 2023-01-23 23:11:36.806552: step: 48/77, loss: 2.7718328055925667e-05 2023-01-23 23:11:38.087830: step: 52/77, loss: 8.046550874496461e-07 2023-01-23 23:11:39.359581: step: 56/77, loss: 0.0021199325565248728 2023-01-23 23:11:40.656047: step: 60/77, loss: 0.008005029521882534 2023-01-23 23:11:41.932516: step: 64/77, loss: 0.0005227422225289047 2023-01-23 23:11:43.286112: step: 68/77, loss: 0.0992288812994957 2023-01-23 23:11:44.591897: step: 72/77, loss: 0.0037284833379089832 2023-01-23 23:11:45.873821: step: 76/77, loss: 0.0004825929645448923 2023-01-23 23:11:47.150000: step: 80/77, loss: 7.151671525207348e-06 2023-01-23 23:11:48.460646: step: 84/77, loss: 0.06846614927053452 2023-01-23 23:11:49.746474: step: 88/77, loss: 0.00017095819930545986 2023-01-23 23:11:51.030413: step: 92/77, loss: 0.009575593285262585 2023-01-23 23:11:52.309523: step: 96/77, loss: 0.0002829671429935843 2023-01-23 23:11:53.584130: step: 100/77, loss: 5.086255259811878e-05 2023-01-23 23:11:54.877246: step: 104/77, loss: 0.0008916123188100755 2023-01-23 23:11:56.160989: step: 108/77, loss: 0.06513987481594086 2023-01-23 23:11:57.450478: step: 112/77, loss: 2.262550151499454e-05 2023-01-23 23:11:58.757525: step: 116/77, loss: 0.0018423409201204777 2023-01-23 23:12:00.069086: step: 120/77, loss: 0.004341209307312965 2023-01-23 23:12:01.386336: step: 124/77, loss: 3.488311995170079e-05 2023-01-23 23:12:02.711947: step: 128/77, loss: 0.008629067800939083 2023-01-23 23:12:04.043183: step: 132/77, loss: 0.00028946733800694346 2023-01-23 23:12:05.315208: step: 136/77, loss: 0.05375664681196213 2023-01-23 23:12:06.633909: step: 140/77, loss: 0.028950396925210953 2023-01-23 23:12:07.935531: step: 144/77, loss: 1.7149346604128368e-05 2023-01-23 23:12:09.244559: step: 148/77, loss: 0.009458029642701149 2023-01-23 23:12:10.558572: step: 152/77, loss: 0.004180568736046553 2023-01-23 23:12:11.856898: step: 156/77, loss: 0.006696035154163837 2023-01-23 23:12:13.149815: step: 160/77, loss: 0.001574444817379117 2023-01-23 23:12:14.406475: step: 164/77, loss: 0.00542917987331748 2023-01-23 23:12:15.742861: step: 168/77, loss: 0.0009257158963009715 2023-01-23 23:12:16.996769: step: 172/77, loss: 0.004280414432287216 2023-01-23 23:12:18.301566: step: 176/77, loss: 0.0005574374226853251 2023-01-23 23:12:19.629068: step: 180/77, loss: 5.240487735136412e-05 2023-01-23 23:12:20.924224: step: 184/77, loss: 0.005636123474687338 2023-01-23 23:12:22.201052: step: 188/77, loss: 0.00011814519530162215 2023-01-23 23:12:23.480314: step: 192/77, loss: 0.03577865660190582 2023-01-23 23:12:24.744970: step: 196/77, loss: 0.013659999705851078 2023-01-23 23:12:26.082186: step: 200/77, loss: 0.00022276055824477226 2023-01-23 23:12:27.368641: step: 204/77, loss: 0.06914380192756653 2023-01-23 23:12:28.639340: step: 208/77, loss: 3.636732435552403e-05 2023-01-23 23:12:29.923531: step: 212/77, loss: 0.00028344907332211733 2023-01-23 23:12:31.239362: step: 216/77, loss: 0.0017052993644028902 2023-01-23 23:12:32.510381: step: 220/77, loss: 0.0013186639407649636 2023-01-23 23:12:33.809457: step: 224/77, loss: 0.0039030585903674364 2023-01-23 23:12:35.125230: step: 228/77, loss: 0.025749292224645615 2023-01-23 23:12:36.421065: step: 232/77, loss: 0.0008861465030349791 2023-01-23 23:12:37.692322: step: 236/77, loss: 5.695230356650427e-05 2023-01-23 23:12:38.996744: step: 240/77, loss: 0.0024523527827113867 2023-01-23 23:12:40.267402: step: 244/77, loss: 1.264049751625862e-05 2023-01-23 23:12:41.535986: step: 248/77, loss: 0.01322510652244091 2023-01-23 23:12:42.861746: step: 252/77, loss: 0.0026189072523266077 2023-01-23 23:12:44.139408: step: 256/77, loss: 0.006667570676654577 2023-01-23 23:12:45.480070: step: 260/77, loss: 0.0010307086631655693 2023-01-23 23:12:46.762401: step: 264/77, loss: 0.022734124213457108 2023-01-23 23:12:48.068016: step: 268/77, loss: 0.001391223049722612 2023-01-23 23:12:49.379363: step: 272/77, loss: 0.010047761723399162 2023-01-23 23:12:50.643734: step: 276/77, loss: 0.0538487546145916 2023-01-23 23:12:51.907796: step: 280/77, loss: 0.03308132290840149 2023-01-23 23:12:53.235152: step: 284/77, loss: 0.004668292123824358 2023-01-23 23:12:54.537928: step: 288/77, loss: 0.012309453450143337 2023-01-23 23:12:55.849820: step: 292/77, loss: 0.000616499746683985 2023-01-23 23:12:57.187192: step: 296/77, loss: 0.0017074373317882419 2023-01-23 23:12:58.466328: step: 300/77, loss: 0.00016156738274730742 2023-01-23 23:12:59.731289: step: 304/77, loss: 0.001918491441756487 2023-01-23 23:13:00.995879: step: 308/77, loss: 0.000854568206705153 2023-01-23 23:13:02.289792: step: 312/77, loss: 0.00010927413677563891 2023-01-23 23:13:03.606346: step: 316/77, loss: 0.00046209630090743303 2023-01-23 23:13:04.941953: step: 320/77, loss: 0.00025831477250903845 2023-01-23 23:13:06.237335: step: 324/77, loss: 0.0002843450929503888 2023-01-23 23:13:07.527523: step: 328/77, loss: 0.0011308686807751656 2023-01-23 23:13:08.849590: step: 332/77, loss: 0.028110980987548828 2023-01-23 23:13:10.187522: step: 336/77, loss: 6.039168874849565e-05 2023-01-23 23:13:11.496495: step: 340/77, loss: 0.04042520746588707 2023-01-23 23:13:12.851398: step: 344/77, loss: 0.0008626186172477901 2023-01-23 23:13:14.141865: step: 348/77, loss: 0.007865481078624725 2023-01-23 23:13:15.475160: step: 352/77, loss: 6.5393810473324265e-06 2023-01-23 23:13:16.743840: step: 356/77, loss: 0.005444225389510393 2023-01-23 23:13:17.981216: step: 360/77, loss: 0.12471656501293182 2023-01-23 23:13:19.307943: step: 364/77, loss: 0.0909993126988411 2023-01-23 23:13:20.623982: step: 368/77, loss: 0.08299486339092255 2023-01-23 23:13:21.895112: step: 372/77, loss: 0.0007615904905833304 2023-01-23 23:13:23.163167: step: 376/77, loss: 0.004367351066321135 2023-01-23 23:13:24.430688: step: 380/77, loss: 0.0003982612688560039 2023-01-23 23:13:25.733668: step: 384/77, loss: 0.00010780211596284062 2023-01-23 23:13:27.101745: step: 388/77, loss: 0.00010356045822845772 ================================================== Loss: 0.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Chinese: {'template': {'p': 0.935064935064935, 'r': 0.549618320610687, 'f1': 0.6923076923076923}, 'slot': {'p': 0.48484848484848486, 'r': 0.014453477868112014, 'f1': 0.02807017543859649}, 'combined': 0.0194331983805668, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Korean: {'template': {'p': 0.922077922077922, 'r': 0.5419847328244275, 'f1': 0.6826923076923077}, 'slot': {'p': 0.4857142857142857, 'r': 0.015356820234869015, 'f1': 0.0297723292469352}, 'combined': 0.02032534015896538, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Russian: {'template': {'p': 0.9358974358974359, 'r': 0.5572519083969466, 'f1': 0.6985645933014354}, 'slot': {'p': 0.5, 'r': 0.015356820234869015, 'f1': 0.029798422436459242}, 'combined': 0.020816122850349516, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 18 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:15:10.031874: step: 4/77, loss: 0.0013607381843030453 2023-01-23 23:15:11.307641: step: 8/77, loss: 0.02796463668346405 2023-01-23 23:15:12.569028: step: 12/77, loss: 0.03240637853741646 2023-01-23 23:15:13.849667: step: 16/77, loss: 0.0004766408819705248 2023-01-23 23:15:15.046061: step: 20/77, loss: 0.007245106156915426 2023-01-23 23:15:16.361566: step: 24/77, loss: 0.005309689790010452 2023-01-23 23:15:17.624303: step: 28/77, loss: 0.022519662976264954 2023-01-23 23:15:18.933120: step: 32/77, loss: 0.012556134723126888 2023-01-23 23:15:20.240785: step: 36/77, loss: 6.95910130161792e-05 2023-01-23 23:15:21.559116: step: 40/77, loss: 0.0017030031885951757 2023-01-23 23:15:22.861095: step: 44/77, loss: 0.0013226622249931097 2023-01-23 23:15:24.116899: step: 48/77, loss: 0.0002576282131485641 2023-01-23 23:15:25.410033: step: 52/77, loss: 0.10945596545934677 2023-01-23 23:15:26.760293: step: 56/77, loss: 0.0019031423144042492 2023-01-23 23:15:28.061229: step: 60/77, loss: 7.376941357506439e-05 2023-01-23 23:15:29.374839: step: 64/77, loss: 8.989451998786535e-06 2023-01-23 23:15:30.677449: step: 68/77, loss: 0.00048225466161966324 2023-01-23 23:15:31.967967: step: 72/77, loss: 0.009790761396288872 2023-01-23 23:15:33.227935: step: 76/77, loss: 0.00016322605370078236 2023-01-23 23:15:34.522799: step: 80/77, loss: 3.763324275496416e-05 2023-01-23 23:15:35.848460: step: 84/77, loss: 0.000363578787073493 2023-01-23 23:15:37.165340: step: 88/77, loss: 0.024104459211230278 2023-01-23 23:15:38.493637: step: 92/77, loss: 0.001635014428757131 2023-01-23 23:15:39.783797: step: 96/77, loss: 0.008522514253854752 2023-01-23 23:15:41.090084: step: 100/77, loss: 0.0014053500490263104 2023-01-23 23:15:42.384827: step: 104/77, loss: 3.954611383960582e-05 2023-01-23 23:15:43.736799: step: 108/77, loss: 0.004524328745901585 2023-01-23 23:15:45.044551: step: 112/77, loss: 0.004729835316538811 2023-01-23 23:15:46.324853: step: 116/77, loss: 0.00045003817649558187 2023-01-23 23:15:47.630006: step: 120/77, loss: 7.829015521565452e-05 2023-01-23 23:15:48.931744: step: 124/77, loss: 9.200895146932453e-05 2023-01-23 23:15:50.243179: step: 128/77, loss: 0.0005236926954239607 2023-01-23 23:15:51.529039: step: 132/77, loss: 0.0073389639146625996 2023-01-23 23:15:52.821659: step: 136/77, loss: 0.008800855837762356 2023-01-23 23:15:54.135006: step: 140/77, loss: 0.006390353199094534 2023-01-23 23:15:55.435548: step: 144/77, loss: 0.0022420105524361134 2023-01-23 23:15:56.669260: step: 148/77, loss: 0.003213587449863553 2023-01-23 23:15:57.941960: step: 152/77, loss: 4.092996823601425e-05 2023-01-23 23:15:59.204975: step: 156/77, loss: 0.016200561076402664 2023-01-23 23:16:00.484888: step: 160/77, loss: 0.0342121496796608 2023-01-23 23:16:01.785274: step: 164/77, loss: 0.0004409652901813388 2023-01-23 23:16:03.059840: step: 168/77, loss: 0.002289507072418928 2023-01-23 23:16:04.358109: step: 172/77, loss: 0.0008406225824728608 2023-01-23 23:16:05.685943: step: 176/77, loss: 0.0002802301896736026 2023-01-23 23:16:06.968755: step: 180/77, loss: 0.01646391674876213 2023-01-23 23:16:08.232682: step: 184/77, loss: 0.03260109946131706 2023-01-23 23:16:09.561785: step: 188/77, loss: 0.0011455873027443886 2023-01-23 23:16:10.879421: step: 192/77, loss: 7.002104393905029e-05 2023-01-23 23:16:12.125103: step: 196/77, loss: 0.0010831948602572083 2023-01-23 23:16:13.413707: step: 200/77, loss: 0.00020702120673377067 2023-01-23 23:16:14.681928: step: 204/77, loss: 0.0004199196700938046 2023-01-23 23:16:15.992498: step: 208/77, loss: 0.0051574683748185635 2023-01-23 23:16:17.230051: step: 212/77, loss: 9.940290328813717e-05 2023-01-23 23:16:18.536502: step: 216/77, loss: 0.00029683380853384733 2023-01-23 23:16:19.844940: step: 220/77, loss: 1.3445323929772712e-05 2023-01-23 23:16:21.189571: step: 224/77, loss: 0.0007331773522309959 2023-01-23 23:16:22.463699: step: 228/77, loss: 5.822964340040926e-06 2023-01-23 23:16:23.774436: step: 232/77, loss: 2.0315183064667508e-05 2023-01-23 23:16:25.094993: step: 236/77, loss: 0.00042941138963215053 2023-01-23 23:16:26.378357: step: 240/77, loss: 0.004983678925782442 2023-01-23 23:16:27.691475: step: 244/77, loss: 0.046360064297914505 2023-01-23 23:16:28.992906: step: 248/77, loss: 0.00013092627341393381 2023-01-23 23:16:30.313118: step: 252/77, loss: 3.65358755516354e-06 2023-01-23 23:16:31.620761: step: 256/77, loss: 0.004260228481143713 2023-01-23 23:16:32.891988: step: 260/77, loss: 0.0007137374486774206 2023-01-23 23:16:34.213031: step: 264/77, loss: 0.009653638117015362 2023-01-23 23:16:35.508780: step: 268/77, loss: 0.00011329995322739705 2023-01-23 23:16:36.846570: step: 272/77, loss: 2.1175910660531372e-05 2023-01-23 23:16:38.186715: step: 276/77, loss: 0.02434980869293213 2023-01-23 23:16:39.510287: step: 280/77, loss: 0.019267985597252846 2023-01-23 23:16:40.808836: step: 284/77, loss: 1.3849632523488253e-05 2023-01-23 23:16:42.101019: step: 288/77, loss: 0.0006705262931063771 2023-01-23 23:16:43.365727: step: 292/77, loss: 0.0005639860755763948 2023-01-23 23:16:44.644216: step: 296/77, loss: 0.004534238949418068 2023-01-23 23:16:45.912456: step: 300/77, loss: 0.01240801066160202 2023-01-23 23:16:47.203361: step: 304/77, loss: 0.0001623555872356519 2023-01-23 23:16:48.523600: step: 308/77, loss: 0.01382834929972887 2023-01-23 23:16:49.786645: step: 312/77, loss: 0.0010725526371970773 2023-01-23 23:16:51.062199: step: 316/77, loss: 0.09580781310796738 2023-01-23 23:16:52.378262: step: 320/77, loss: 0.0006804431322962046 2023-01-23 23:16:53.697560: step: 324/77, loss: 0.0001848753308877349 2023-01-23 23:16:55.015459: step: 328/77, loss: 0.000658830045722425 2023-01-23 23:16:56.365218: step: 332/77, loss: 0.015460480935871601 2023-01-23 23:16:57.655411: step: 336/77, loss: 6.475405825767666e-06 2023-01-23 23:16:59.006026: step: 340/77, loss: 0.0004257145628798753 2023-01-23 23:17:00.325105: step: 344/77, loss: 1.8175755030824803e-05 2023-01-23 23:17:01.620836: step: 348/77, loss: 0.002383376006036997 2023-01-23 23:17:02.905382: step: 352/77, loss: 0.010072077624499798 2023-01-23 23:17:04.171874: step: 356/77, loss: 0.0045144869945943356 2023-01-23 23:17:05.464175: step: 360/77, loss: 0.06425094604492188 2023-01-23 23:17:06.757583: step: 364/77, loss: 0.0003105873183812946 2023-01-23 23:17:08.046038: step: 368/77, loss: 0.007175946142524481 2023-01-23 23:17:09.319475: step: 372/77, loss: 0.00176761404145509 2023-01-23 23:17:10.598199: step: 376/77, loss: 0.03691961616277695 2023-01-23 23:17:11.886128: step: 380/77, loss: 0.0033785656560212374 2023-01-23 23:17:13.159054: step: 384/77, loss: 0.028056461364030838 2023-01-23 23:17:14.501266: step: 388/77, loss: 1.9826911739073694e-05 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Chinese: {'template': {'p': 0.971830985915493, 'r': 0.5267175572519084, 'f1': 0.6831683168316832}, 'slot': {'p': 0.4666666666666667, 'r': 0.012646793134598013, 'f1': 0.024626209322779244}, 'combined': 0.016823845972987802, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Korean: {'template': {'p': 0.9577464788732394, 'r': 0.5190839694656488, 'f1': 0.6732673267326732}, 'slot': {'p': 0.4666666666666667, 'r': 0.012646793134598013, 'f1': 0.024626209322779244}, 'combined': 0.016580022118306816, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Russian: {'template': {'p': 0.9583333333333334, 'r': 0.5267175572519084, 'f1': 0.6798029556650246}, 'slot': {'p': 0.4666666666666667, 'r': 0.012646793134598013, 'f1': 0.024626209322779244}, 'combined': 0.016740969884450913, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 19 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:18:57.364778: step: 4/77, loss: 1.1375072972441558e-05 2023-01-23 23:18:58.619445: step: 8/77, loss: 0.11558934301137924 2023-01-23 23:18:59.916772: step: 12/77, loss: 0.026156434789299965 2023-01-23 23:19:01.238172: step: 16/77, loss: 6.465530896093696e-05 2023-01-23 23:19:02.535058: step: 20/77, loss: 5.3221036068862304e-05 2023-01-23 23:19:03.822332: step: 24/77, loss: 8.60507643665187e-05 2023-01-23 23:19:05.121999: step: 28/77, loss: 2.8975300665479153e-05 2023-01-23 23:19:06.424979: step: 32/77, loss: 0.039114248007535934 2023-01-23 23:19:07.694673: step: 36/77, loss: 0.00038312008837237954 2023-01-23 23:19:08.997215: step: 40/77, loss: 0.13180230557918549 2023-01-23 23:19:10.289402: step: 44/77, loss: 0.000846332055516541 2023-01-23 23:19:11.589858: step: 48/77, loss: 0.02761784754693508 2023-01-23 23:19:12.888547: step: 52/77, loss: 9.444686293136328e-05 2023-01-23 23:19:14.214101: step: 56/77, loss: 6.0827398556284606e-05 2023-01-23 23:19:15.553475: step: 60/77, loss: 0.001431214390322566 2023-01-23 23:19:16.876119: step: 64/77, loss: 0.001511475071310997 2023-01-23 23:19:18.165627: step: 68/77, loss: 0.021096883341670036 2023-01-23 23:19:19.403280: step: 72/77, loss: 0.08325263112783432 2023-01-23 23:19:20.741926: step: 76/77, loss: 0.000645479594822973 2023-01-23 23:19:22.057297: step: 80/77, loss: 0.0013617220101878047 2023-01-23 23:19:23.332072: step: 84/77, loss: 0.0008968734182417393 2023-01-23 23:19:24.635744: step: 88/77, loss: 3.4381380828563124e-05 2023-01-23 23:19:25.970650: step: 92/77, loss: 4.9660422519082204e-05 2023-01-23 23:19:27.271489: step: 96/77, loss: 3.6714936868520454e-05 2023-01-23 23:19:28.558176: step: 100/77, loss: 0.0014671917306259274 2023-01-23 23:19:29.844138: step: 104/77, loss: 3.5135267353325617e-06 2023-01-23 23:19:31.111835: step: 108/77, loss: 1.618326859897934e-05 2023-01-23 23:19:32.411980: step: 112/77, loss: 9.728290024213493e-06 2023-01-23 23:19:33.690696: step: 116/77, loss: 0.0003774128563236445 2023-01-23 23:19:35.012080: step: 120/77, loss: 4.9582966312300414e-05 2023-01-23 23:19:36.305450: step: 124/77, loss: 0.014143971726298332 2023-01-23 23:19:37.624964: step: 128/77, loss: 6.825715536251664e-05 2023-01-23 23:19:38.975689: step: 132/77, loss: 0.00046930837561376393 2023-01-23 23:19:40.233467: step: 136/77, loss: 0.03473134711384773 2023-01-23 23:19:41.526781: step: 140/77, loss: 5.870778841199353e-05 2023-01-23 23:19:42.805471: step: 144/77, loss: 0.00018723284301813692 2023-01-23 23:19:44.107219: step: 148/77, loss: 0.0039024415891617537 2023-01-23 23:19:45.395605: step: 152/77, loss: 0.016148289665579796 2023-01-23 23:19:46.657251: step: 156/77, loss: 9.999565008911304e-06 2023-01-23 23:19:47.901600: step: 160/77, loss: 1.606312707735924e-06 2023-01-23 23:19:49.222879: step: 164/77, loss: 0.001128826173953712 2023-01-23 23:19:50.523997: step: 168/77, loss: 0.0005516837118193507 2023-01-23 23:19:51.799237: step: 172/77, loss: 0.0003651257138699293 2023-01-23 23:19:53.104488: step: 176/77, loss: 0.0001059175847331062 2023-01-23 23:19:54.372571: step: 180/77, loss: 0.0007901396602392197 2023-01-23 23:19:55.621619: step: 184/77, loss: 3.967192242271267e-05 2023-01-23 23:19:56.914343: step: 188/77, loss: 0.0005969982594251633 2023-01-23 23:19:58.241290: step: 192/77, loss: 4.7730307414894924e-05 2023-01-23 23:19:59.572265: step: 196/77, loss: 3.742340413737111e-05 2023-01-23 23:20:00.846511: step: 200/77, loss: 0.00013582775136455894 2023-01-23 23:20:02.151128: step: 204/77, loss: 0.0011125564342364669 2023-01-23 23:20:03.439686: step: 208/77, loss: 0.0021483588498085737 2023-01-23 23:20:04.770581: step: 212/77, loss: 0.0002482504933141172 2023-01-23 23:20:06.080926: step: 216/77, loss: 3.252530223107897e-05 2023-01-23 23:20:07.409032: step: 220/77, loss: 0.03862406313419342 2023-01-23 23:20:08.744792: step: 224/77, loss: 0.03803897649049759 2023-01-23 23:20:10.011065: step: 228/77, loss: 0.0027629907708615065 2023-01-23 23:20:11.329607: step: 232/77, loss: 0.022126782685518265 2023-01-23 23:20:12.573258: step: 236/77, loss: 7.108498721208889e-06 2023-01-23 23:20:13.823034: step: 240/77, loss: 1.3698463590117171e-05 2023-01-23 23:20:15.132145: step: 244/77, loss: 0.0001432388526154682 2023-01-23 23:20:16.402796: step: 248/77, loss: 0.03477947786450386 2023-01-23 23:20:17.713877: step: 252/77, loss: 0.037318840622901917 2023-01-23 23:20:19.046932: step: 256/77, loss: 0.00020358621259219944 2023-01-23 23:20:20.343509: step: 260/77, loss: 0.0005873471382074058 2023-01-23 23:20:21.614137: step: 264/77, loss: 0.003738407976925373 2023-01-23 23:20:22.928132: step: 268/77, loss: 0.007683799136430025 2023-01-23 23:20:24.225165: step: 272/77, loss: 0.001356719876639545 2023-01-23 23:20:25.524384: step: 276/77, loss: 0.00033869032631628215 2023-01-23 23:20:26.820149: step: 280/77, loss: 0.00492723798379302 2023-01-23 23:20:28.138112: step: 284/77, loss: 0.0012603984214365482 2023-01-23 23:20:29.426070: step: 288/77, loss: 0.0006752713234163821 2023-01-23 23:20:30.756040: step: 292/77, loss: 3.044335426238831e-05 2023-01-23 23:20:32.047348: step: 296/77, loss: 4.2869993194472045e-05 2023-01-23 23:20:33.389462: step: 300/77, loss: 0.01467337179929018 2023-01-23 23:20:34.676237: step: 304/77, loss: 0.0013566524721682072 2023-01-23 23:20:35.990490: step: 308/77, loss: 0.00900754053145647 2023-01-23 23:20:37.305753: step: 312/77, loss: 0.00014935439685359597 2023-01-23 23:20:38.665902: step: 316/77, loss: 0.00011078764509875327 2023-01-23 23:20:39.946132: step: 320/77, loss: 0.0012865741737186909 2023-01-23 23:20:41.302564: step: 324/77, loss: 9.888896602205932e-05 2023-01-23 23:20:42.611043: step: 328/77, loss: 0.0003299658128526062 2023-01-23 23:20:43.899377: step: 332/77, loss: 0.017302073538303375 2023-01-23 23:20:45.216954: step: 336/77, loss: 0.06188048794865608 2023-01-23 23:20:46.545881: step: 340/77, loss: 3.5314424167154357e-06 2023-01-23 23:20:47.842001: step: 344/77, loss: 0.007390220183879137 2023-01-23 23:20:49.096750: step: 348/77, loss: 0.0023874423932284117 2023-01-23 23:20:50.410508: step: 352/77, loss: 0.004484446253627539 2023-01-23 23:20:51.680414: step: 356/77, loss: 0.00024538126308470964 2023-01-23 23:20:53.025420: step: 360/77, loss: 0.003874297020956874 2023-01-23 23:20:54.320942: step: 364/77, loss: 0.012899035587906837 2023-01-23 23:20:55.631225: step: 368/77, loss: 4.1917381167877465e-05 2023-01-23 23:20:56.914439: step: 372/77, loss: 0.0030052876099944115 2023-01-23 23:20:58.233332: step: 376/77, loss: 0.0003932855906896293 2023-01-23 23:20:59.564710: step: 380/77, loss: 0.0002435240603517741 2023-01-23 23:21:00.852231: step: 384/77, loss: 0.0325603261590004 2023-01-23 23:21:02.125227: step: 388/77, loss: 0.03924199938774109 ================================================== Loss: 0.010 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5343511450381679, 'f1': 0.6829268292682927}, 'slot': {'p': 0.4186046511627907, 'r': 0.016260162601626018, 'f1': 0.03130434782608696}, 'combined': 0.02137857900318134, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5343511450381679, 'f1': 0.6829268292682927}, 'slot': {'p': 0.4146341463414634, 'r': 0.015356820234869015, 'f1': 0.029616724738675958}, 'combined': 0.02022605591909578, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Russian: {'template': {'p': 0.96, 'r': 0.549618320610687, 'f1': 0.6990291262135923}, 'slot': {'p': 0.43902439024390244, 'r': 0.016260162601626018, 'f1': 0.03135888501742161}, 'combined': 0.021920773992760736, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 20 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:22:44.986455: step: 4/77, loss: 0.00025785062462091446 2023-01-23 23:22:46.231681: step: 8/77, loss: 0.0807093009352684 2023-01-23 23:22:47.555687: step: 12/77, loss: 9.079035953618586e-05 2023-01-23 23:22:48.830995: step: 16/77, loss: 0.003588865976780653 2023-01-23 23:22:50.173075: step: 20/77, loss: 7.154016202548519e-05 2023-01-23 23:22:51.460589: step: 24/77, loss: 0.00013810431119054556 2023-01-23 23:22:52.746155: step: 28/77, loss: 0.001978781074285507 2023-01-23 23:22:54.052597: step: 32/77, loss: 3.17572194035165e-05 2023-01-23 23:22:55.366259: step: 36/77, loss: 0.000227707700105384 2023-01-23 23:22:56.658552: step: 40/77, loss: 0.0004038464685436338 2023-01-23 23:22:57.929265: step: 44/77, loss: 1.2348644304438494e-05 2023-01-23 23:22:59.266106: step: 48/77, loss: 0.00803268514573574 2023-01-23 23:23:00.544405: step: 52/77, loss: 0.013548798859119415 2023-01-23 23:23:01.822808: step: 56/77, loss: 0.0005574793322011828 2023-01-23 23:23:03.109726: step: 60/77, loss: 0.000170299390447326 2023-01-23 23:23:04.378122: step: 64/77, loss: 0.005198408383876085 2023-01-23 23:23:05.718180: step: 68/77, loss: 0.009746159426867962 2023-01-23 23:23:06.967201: step: 72/77, loss: 0.0016452963463962078 2023-01-23 23:23:08.245476: step: 76/77, loss: 0.0001234847295563668 2023-01-23 23:23:09.571114: step: 80/77, loss: 0.0015508176293224096 2023-01-23 23:23:10.887002: step: 84/77, loss: 2.2406420612242073e-05 2023-01-23 23:23:12.194431: step: 88/77, loss: 0.004509768448770046 2023-01-23 23:23:13.476691: step: 92/77, loss: 0.00036657514283433557 2023-01-23 23:23:14.757787: step: 96/77, loss: 7.71563354646787e-05 2023-01-23 23:23:16.025464: step: 100/77, loss: 0.005974154453724623 2023-01-23 23:23:17.323110: step: 104/77, loss: 8.223913027904928e-06 2023-01-23 23:23:18.568496: step: 108/77, loss: 0.0004409156972542405 2023-01-23 23:23:19.857301: step: 112/77, loss: 0.000953411974478513 2023-01-23 23:23:21.146842: step: 116/77, loss: 0.0015970554668456316 2023-01-23 23:23:22.448427: step: 120/77, loss: 0.003411682788282633 2023-01-23 23:23:23.747829: step: 124/77, loss: 0.053259797394275665 2023-01-23 23:23:25.033326: step: 128/77, loss: 0.00028643987025134265 2023-01-23 23:23:26.331258: step: 132/77, loss: 0.015228739939630032 2023-01-23 23:23:27.630904: step: 136/77, loss: 0.0001034773958963342 2023-01-23 23:23:28.944642: step: 140/77, loss: 0.0046743834391236305 2023-01-23 23:23:30.281199: step: 144/77, loss: 0.029333055019378662 2023-01-23 23:23:31.583161: step: 148/77, loss: 1.1347108738846146e-05 2023-01-23 23:23:32.933259: step: 152/77, loss: 0.0005708672106266022 2023-01-23 23:23:34.257430: step: 156/77, loss: 5.751201115344884e-06 2023-01-23 23:23:35.543318: step: 160/77, loss: 4.07801526307594e-05 2023-01-23 23:23:36.828808: step: 164/77, loss: 0.0014126194873824716 2023-01-23 23:23:38.126292: step: 168/77, loss: 0.00010916890460066497 2023-01-23 23:23:39.417479: step: 172/77, loss: 0.09491101652383804 2023-01-23 23:23:40.768966: step: 176/77, loss: 6.833251245552674e-05 2023-01-23 23:23:42.127891: step: 180/77, loss: 0.041619233787059784 2023-01-23 23:23:43.462690: step: 184/77, loss: 1.1870360140164848e-05 2023-01-23 23:23:44.771880: step: 188/77, loss: 0.0008768976549617946 2023-01-23 23:23:46.086628: step: 192/77, loss: 0.0011280208127573133 2023-01-23 23:23:47.418523: step: 196/77, loss: 0.0035699442960321903 2023-01-23 23:23:48.660946: step: 200/77, loss: 0.001058831694535911 2023-01-23 23:23:49.935160: step: 204/77, loss: 0.01659216731786728 2023-01-23 23:23:51.207304: step: 208/77, loss: 0.0856424868106842 2023-01-23 23:23:52.493329: step: 212/77, loss: 0.0002852054312825203 2023-01-23 23:23:53.773199: step: 216/77, loss: 3.439107331359992e-06 2023-01-23 23:23:55.059738: step: 220/77, loss: 0.000124076206702739 2023-01-23 23:23:56.335109: step: 224/77, loss: 0.050598494708538055 2023-01-23 23:23:57.659154: step: 228/77, loss: 0.000414682348491624 2023-01-23 23:23:58.961547: step: 232/77, loss: 0.00021670106798410416 2023-01-23 23:24:00.236503: step: 236/77, loss: 1.5346533473348245e-05 2023-01-23 23:24:01.508630: step: 240/77, loss: 0.002009189687669277 2023-01-23 23:24:02.815640: step: 244/77, loss: 0.00034373922972008586 2023-01-23 23:24:04.141725: step: 248/77, loss: 5.6421900808345526e-05 2023-01-23 23:24:05.432134: step: 252/77, loss: 4.0775397792458534e-05 2023-01-23 23:24:06.713427: step: 256/77, loss: 0.00833223108202219 2023-01-23 23:24:07.993131: step: 260/77, loss: 0.08339645713567734 2023-01-23 23:24:09.326536: step: 264/77, loss: 0.0011742322240024805 2023-01-23 23:24:10.594618: step: 268/77, loss: 7.463623478543013e-05 2023-01-23 23:24:11.873376: step: 272/77, loss: 0.05141519010066986 2023-01-23 23:24:13.124931: step: 276/77, loss: 3.3183584946527844e-06 2023-01-23 23:24:14.428854: step: 280/77, loss: 0.026045873761177063 2023-01-23 23:24:15.781339: step: 284/77, loss: 0.0007297683041542768 2023-01-23 23:24:17.105306: step: 288/77, loss: 1.0163198567170184e-05 2023-01-23 23:24:18.387595: step: 292/77, loss: 0.00027464088634587824 2023-01-23 23:24:19.701560: step: 296/77, loss: 0.007331944536417723 2023-01-23 23:24:21.032790: step: 300/77, loss: 0.00548921525478363 2023-01-23 23:24:22.330437: step: 304/77, loss: 0.00010299640416633338 2023-01-23 23:24:23.676805: step: 308/77, loss: 7.229519269458251e-06 2023-01-23 23:24:24.974462: step: 312/77, loss: 0.0007639031973667443 2023-01-23 23:24:26.295891: step: 316/77, loss: 0.00017782395298127085 2023-01-23 23:24:27.609484: step: 320/77, loss: 0.06606744229793549 2023-01-23 23:24:28.904966: step: 324/77, loss: 0.005998269654810429 2023-01-23 23:24:30.214745: step: 328/77, loss: 1.3011542250751518e-05 2023-01-23 23:24:31.503585: step: 332/77, loss: 0.0018053125822916627 2023-01-23 23:24:32.773627: step: 336/77, loss: 0.015008185058832169 2023-01-23 23:24:34.104696: step: 340/77, loss: 0.0002600968873593956 2023-01-23 23:24:35.378482: step: 344/77, loss: 0.012032567523419857 2023-01-23 23:24:36.657498: step: 348/77, loss: 0.009152377024292946 2023-01-23 23:24:37.946828: step: 352/77, loss: 0.0005421206587925553 2023-01-23 23:24:39.248566: step: 356/77, loss: 0.008107601664960384 2023-01-23 23:24:40.529419: step: 360/77, loss: 0.030353447422385216 2023-01-23 23:24:41.843832: step: 364/77, loss: 0.0006560410256497562 2023-01-23 23:24:43.073019: step: 368/77, loss: 5.143606358615216e-06 2023-01-23 23:24:44.400298: step: 372/77, loss: 2.687372762011364e-05 2023-01-23 23:24:45.712193: step: 376/77, loss: 0.002569367177784443 2023-01-23 23:24:47.028416: step: 380/77, loss: 4.8763849918032065e-05 2023-01-23 23:24:48.342221: step: 384/77, loss: 0.005358351860195398 2023-01-23 23:24:49.689946: step: 388/77, loss: 7.558208744740114e-05 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Chinese: {'template': {'p': 0.9696969696969697, 'r': 0.48854961832061067, 'f1': 0.6497461928934011}, 'slot': {'p': 0.42857142857142855, 'r': 0.013550135501355014, 'f1': 0.02626970227670753}, 'combined': 0.01706863904273383, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.48091603053435117, 'f1': 0.6428571428571428}, 'slot': {'p': 0.4, 'r': 0.012646793134598013, 'f1': 0.0245183887915937}, 'combined': 0.01576182136602452, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Russian: {'template': {'p': 0.9696969696969697, 'r': 0.48854961832061067, 'f1': 0.6497461928934011}, 'slot': {'p': 0.42857142857142855, 'r': 0.013550135501355014, 'f1': 0.02626970227670753}, 'combined': 0.01706863904273383, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 21 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:26:32.883595: step: 4/77, loss: 1.1914366950804833e-05 2023-01-23 23:26:34.160109: step: 8/77, loss: 0.0006677417550235987 2023-01-23 23:26:35.489342: step: 12/77, loss: 0.0010498034534975886 2023-01-23 23:26:36.810730: step: 16/77, loss: 4.68288162664976e-05 2023-01-23 23:26:38.132181: step: 20/77, loss: 0.006344620604068041 2023-01-23 23:26:39.446530: step: 24/77, loss: 0.0009554250864312053 2023-01-23 23:26:40.718520: step: 28/77, loss: 0.00023494433844462037 2023-01-23 23:26:42.027981: step: 32/77, loss: 0.011859744787216187 2023-01-23 23:26:43.354339: step: 36/77, loss: 0.00040839900611899793 2023-01-23 23:26:44.619800: step: 40/77, loss: 0.0011700581526383758 2023-01-23 23:26:45.885906: step: 44/77, loss: 0.00013149564620107412 2023-01-23 23:26:47.171048: step: 48/77, loss: 0.029546651989221573 2023-01-23 23:26:48.465383: step: 52/77, loss: 0.001863011159002781 2023-01-23 23:26:49.783722: step: 56/77, loss: 0.07704512774944305 2023-01-23 23:26:51.078763: step: 60/77, loss: 0.022442875429987907 2023-01-23 23:26:52.405377: step: 64/77, loss: 0.0035350825637578964 2023-01-23 23:26:53.719316: step: 68/77, loss: 0.0333038829267025 2023-01-23 23:26:55.040674: step: 72/77, loss: 0.008941985666751862 2023-01-23 23:26:56.322143: step: 76/77, loss: 1.3948665582574904e-05 2023-01-23 23:26:57.650171: step: 80/77, loss: 0.00012010518548777327 2023-01-23 23:26:58.921984: step: 84/77, loss: 0.0011105663143098354 2023-01-23 23:27:00.243119: step: 88/77, loss: 0.00029640455613844097 2023-01-23 23:27:01.552910: step: 92/77, loss: 0.004373971838504076 2023-01-23 23:27:02.882558: step: 96/77, loss: 0.0002705455117393285 2023-01-23 23:27:04.184434: step: 100/77, loss: 0.0009135695872828364 2023-01-23 23:27:05.491304: step: 104/77, loss: 0.00013783590111415833 2023-01-23 23:27:06.794237: step: 108/77, loss: 5.1556075050029904e-05 2023-01-23 23:27:08.071137: step: 112/77, loss: 0.005635536275804043 2023-01-23 23:27:09.320471: step: 116/77, loss: 0.026683399453759193 2023-01-23 23:27:10.638043: step: 120/77, loss: 4.783980330103077e-06 2023-01-23 23:27:11.966422: step: 124/77, loss: 6.202296208357438e-05 2023-01-23 23:27:13.270121: step: 128/77, loss: 3.960451977036428e-06 2023-01-23 23:27:14.522544: step: 132/77, loss: 3.1410495466843713e-06 2023-01-23 23:27:15.851114: step: 136/77, loss: 2.6866157440963434e-06 2023-01-23 23:27:17.172381: step: 140/77, loss: 0.027216006070375443 2023-01-23 23:27:18.470224: step: 144/77, loss: 0.001939344103448093 2023-01-23 23:27:19.833779: step: 148/77, loss: 0.00010330761870136485 2023-01-23 23:27:21.112017: step: 152/77, loss: 0.03519413247704506 2023-01-23 23:27:22.389160: step: 156/77, loss: 0.01529156044125557 2023-01-23 23:27:23.684974: step: 160/77, loss: 0.00013548173592425883 2023-01-23 23:27:24.945298: step: 164/77, loss: 0.00017336659948341548 2023-01-23 23:27:26.234627: step: 168/77, loss: 1.773536132532172e-05 2023-01-23 23:27:27.546334: step: 172/77, loss: 0.0007969909347593784 2023-01-23 23:27:28.839689: step: 176/77, loss: 0.000521956360898912 2023-01-23 23:27:30.104336: step: 180/77, loss: 0.0014356840401887894 2023-01-23 23:27:31.433557: step: 184/77, loss: 0.010686034336686134 2023-01-23 23:27:32.723532: step: 188/77, loss: 3.401363574084826e-05 2023-01-23 23:27:34.060937: step: 192/77, loss: 0.0011438775109127164 2023-01-23 23:27:35.333021: step: 196/77, loss: 0.0010469158878549933 2023-01-23 23:27:36.646321: step: 200/77, loss: 6.212064909050241e-05 2023-01-23 23:27:38.019236: step: 204/77, loss: 0.0015256913611665368 2023-01-23 23:27:39.312245: step: 208/77, loss: 0.05329586938023567 2023-01-23 23:27:40.600505: step: 212/77, loss: 2.653541559993755e-05 2023-01-23 23:27:41.923967: step: 216/77, loss: 7.547883797087707e-06 2023-01-23 23:27:43.214344: step: 220/77, loss: 0.0016823242185637355 2023-01-23 23:27:44.509399: step: 224/77, loss: 0.00011696373258018866 2023-01-23 23:27:45.796379: step: 228/77, loss: 4.268263000994921e-05 2023-01-23 23:27:47.090285: step: 232/77, loss: 0.0003383099683560431 2023-01-23 23:27:48.402291: step: 236/77, loss: 0.022379782050848007 2023-01-23 23:27:49.776061: step: 240/77, loss: 0.0016691423952579498 2023-01-23 23:27:51.053935: step: 244/77, loss: 2.1457188267959282e-05 2023-01-23 23:27:52.384986: step: 248/77, loss: 0.001095449784770608 2023-01-23 23:27:53.765171: step: 252/77, loss: 0.00016877110465429723 2023-01-23 23:27:55.064733: step: 256/77, loss: 0.0005651089013554156 2023-01-23 23:27:56.374813: step: 260/77, loss: 0.02748904563486576 2023-01-23 23:27:57.702509: step: 264/77, loss: 0.0003405744209885597 2023-01-23 23:27:58.991519: step: 268/77, loss: 1.212671213579597e-05 2023-01-23 23:28:00.323660: step: 272/77, loss: 0.0006532514235004783 2023-01-23 23:28:01.640627: step: 276/77, loss: 0.008219941519200802 2023-01-23 23:28:02.935220: step: 280/77, loss: 0.00036534247919917107 2023-01-23 23:28:04.244528: step: 284/77, loss: 0.00047409304534085095 2023-01-23 23:28:05.535923: step: 288/77, loss: 9.568202949594706e-05 2023-01-23 23:28:06.825393: step: 292/77, loss: 1.4007066795329592e-07 2023-01-23 23:28:08.118217: step: 296/77, loss: 0.025079643353819847 2023-01-23 23:28:09.432311: step: 300/77, loss: 7.805313543940429e-06 2023-01-23 23:28:10.748849: step: 304/77, loss: 4.479253038880415e-05 2023-01-23 23:28:12.088978: step: 308/77, loss: 0.001709498930722475 2023-01-23 23:28:13.346380: step: 312/77, loss: 0.00020420948567334563 2023-01-23 23:28:14.628385: step: 316/77, loss: 1.8438247934682295e-05 2023-01-23 23:28:15.990050: step: 320/77, loss: 0.12470944225788116 2023-01-23 23:28:17.268382: step: 324/77, loss: 0.0016903901705518365 2023-01-23 23:28:18.559535: step: 328/77, loss: 0.00316980411298573 2023-01-23 23:28:19.829307: step: 332/77, loss: 0.0025733783841133118 2023-01-23 23:28:21.157906: step: 336/77, loss: 0.00011846191773656756 2023-01-23 23:28:22.463878: step: 340/77, loss: 0.0011499988613650203 2023-01-23 23:28:23.749142: step: 344/77, loss: 1.996714672714006e-06 2023-01-23 23:28:25.051758: step: 348/77, loss: 8.102708670776337e-05 2023-01-23 23:28:26.342164: step: 352/77, loss: 0.00878163706511259 2023-01-23 23:28:27.631531: step: 356/77, loss: 0.0012174914591014385 2023-01-23 23:28:28.915696: step: 360/77, loss: 4.949744834448211e-06 2023-01-23 23:28:30.152363: step: 364/77, loss: 0.0038780434988439083 2023-01-23 23:28:31.451916: step: 368/77, loss: 0.012070821598172188 2023-01-23 23:28:32.738466: step: 372/77, loss: 0.0018070744117721915 2023-01-23 23:28:34.017283: step: 376/77, loss: 7.179051863204222e-06 2023-01-23 23:28:35.309755: step: 380/77, loss: 0.00012617415632121265 2023-01-23 23:28:36.542386: step: 384/77, loss: 0.004800946451723576 2023-01-23 23:28:37.798473: step: 388/77, loss: 0.014380555599927902 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Chinese: {'template': {'p': 0.925, 'r': 0.5648854961832062, 'f1': 0.7014218009478673}, 'slot': {'p': 0.375, 'r': 0.016260162601626018, 'f1': 0.031168831168831172}, 'combined': 0.02186249769188158, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Korean: {'template': {'p': 0.9240506329113924, 'r': 0.5572519083969466, 'f1': 0.6952380952380953}, 'slot': {'p': 0.375, 'r': 0.016260162601626018, 'f1': 0.031168831168831172}, 'combined': 0.02166975881261596, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Russian: {'template': {'p': 0.9240506329113924, 'r': 0.5572519083969466, 'f1': 0.6952380952380953}, 'slot': {'p': 0.36, 'r': 0.016260162601626018, 'f1': 0.031114952463267072}, 'combined': 0.021632300283985682, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 22 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:30:21.127277: step: 4/77, loss: 0.0004149131418671459 2023-01-23 23:30:22.408753: step: 8/77, loss: 0.049201954156160355 2023-01-23 23:30:23.733175: step: 12/77, loss: 0.02350142039358616 2023-01-23 23:30:25.006691: step: 16/77, loss: 0.0002929410256911069 2023-01-23 23:30:26.342541: step: 20/77, loss: 0.000536171777639538 2023-01-23 23:30:27.641647: step: 24/77, loss: 0.1399349868297577 2023-01-23 23:30:28.915865: step: 28/77, loss: 0.020580261945724487 2023-01-23 23:30:30.184481: step: 32/77, loss: 0.0659652128815651 2023-01-23 23:30:31.474685: step: 36/77, loss: 0.030944466590881348 2023-01-23 23:30:32.738391: step: 40/77, loss: 4.1995273932116106e-05 2023-01-23 23:30:34.037180: step: 44/77, loss: 0.0005336483009159565 2023-01-23 23:30:35.298340: step: 48/77, loss: 0.00249558687210083 2023-01-23 23:30:36.646610: step: 52/77, loss: 0.011043079197406769 2023-01-23 23:30:37.951292: step: 56/77, loss: 0.002647034591063857 2023-01-23 23:30:39.269204: step: 60/77, loss: 0.07114987820386887 2023-01-23 23:30:40.548970: step: 64/77, loss: 0.0004703971208073199 2023-01-23 23:30:41.812430: step: 68/77, loss: 0.11193948984146118 2023-01-23 23:30:43.089400: step: 72/77, loss: 0.006072756368666887 2023-01-23 23:30:44.369888: step: 76/77, loss: 0.004863828886300325 2023-01-23 23:30:45.688892: step: 80/77, loss: 0.0003708004660438746 2023-01-23 23:30:46.995441: step: 84/77, loss: 0.034591663628816605 2023-01-23 23:30:48.283078: step: 88/77, loss: 0.0032617237884551287 2023-01-23 23:30:49.550883: step: 92/77, loss: 0.0001561310637043789 2023-01-23 23:30:50.842696: step: 96/77, loss: 2.1224195734248497e-05 2023-01-23 23:30:52.105573: step: 100/77, loss: 0.0001106334530049935 2023-01-23 23:30:53.448745: step: 104/77, loss: 5.8119076129514724e-05 2023-01-23 23:30:54.732936: step: 108/77, loss: 0.00011468568118289113 2023-01-23 23:30:56.030254: step: 112/77, loss: 1.173296641354682e-05 2023-01-23 23:30:57.322300: step: 116/77, loss: 0.03556504100561142 2023-01-23 23:30:58.603480: step: 120/77, loss: 0.0009185223607346416 2023-01-23 23:30:59.911290: step: 124/77, loss: 0.0006989326211623847 2023-01-23 23:31:01.177754: step: 128/77, loss: 0.0005934851942583919 2023-01-23 23:31:02.515215: step: 132/77, loss: 0.00019201381655875593 2023-01-23 23:31:03.800624: step: 136/77, loss: 0.0001522630627732724 2023-01-23 23:31:05.039398: step: 140/77, loss: 0.00010733659291872755 2023-01-23 23:31:06.295945: step: 144/77, loss: 0.00011833933967864141 2023-01-23 23:31:07.567326: step: 148/77, loss: 0.0002826797135639936 2023-01-23 23:31:08.863125: step: 152/77, loss: 2.389593646512367e-05 2023-01-23 23:31:10.156880: step: 156/77, loss: 0.03188958019018173 2023-01-23 23:31:11.491561: step: 160/77, loss: 7.080078648868948e-05 2023-01-23 23:31:12.770988: step: 164/77, loss: 0.00020094559295102954 2023-01-23 23:31:14.029486: step: 168/77, loss: 4.374063792056404e-05 2023-01-23 23:31:15.309954: step: 172/77, loss: 1.4840891253697919e-06 2023-01-23 23:31:16.560629: step: 176/77, loss: 0.033406343311071396 2023-01-23 23:31:17.831647: step: 180/77, loss: 5.9410504036350176e-05 2023-01-23 23:31:19.088068: step: 184/77, loss: 5.215402509861633e-08 2023-01-23 23:31:20.410000: step: 188/77, loss: 0.00017796778411138803 2023-01-23 23:31:21.740103: step: 192/77, loss: 5.456154212879483e-06 2023-01-23 23:31:23.060862: step: 196/77, loss: 0.0021971790120005608 2023-01-23 23:31:24.348627: step: 200/77, loss: 0.0001755795383360237 2023-01-23 23:31:25.654720: step: 204/77, loss: 0.00436083460226655 2023-01-23 23:31:26.976682: step: 208/77, loss: 7.003449127296335e-07 2023-01-23 23:31:28.217774: step: 212/77, loss: 1.8607093807077035e-05 2023-01-23 23:31:29.554941: step: 216/77, loss: 0.006038544233888388 2023-01-23 23:31:30.868421: step: 220/77, loss: 2.2351736461700966e-08 2023-01-23 23:31:32.210483: step: 224/77, loss: 1.1814416211564094e-05 2023-01-23 23:31:33.524263: step: 228/77, loss: 0.00021583585476037115 2023-01-23 23:31:34.838500: step: 232/77, loss: 0.0002107950858771801 2023-01-23 23:31:36.117603: step: 236/77, loss: 2.4991000827867538e-05 2023-01-23 23:31:37.401779: step: 240/77, loss: 0.00030950052314437926 2023-01-23 23:31:38.717724: step: 244/77, loss: 0.005680656060576439 2023-01-23 23:31:40.053603: step: 248/77, loss: 2.7505677735462086e-06 2023-01-23 23:31:41.289592: step: 252/77, loss: 0.0005136749241501093 2023-01-23 23:31:42.591294: step: 256/77, loss: 4.056877878610976e-05 2023-01-23 23:31:43.893675: step: 260/77, loss: 0.0026249089278280735 2023-01-23 23:31:45.234023: step: 264/77, loss: 0.0010923325316980481 2023-01-23 23:31:46.555432: step: 268/77, loss: 1.0708590707508847e-05 2023-01-23 23:31:47.843561: step: 272/77, loss: 6.018809017405147e-06 2023-01-23 23:31:49.182428: step: 276/77, loss: 0.04078038036823273 2023-01-23 23:31:50.477721: step: 280/77, loss: 9.682937525212765e-05 2023-01-23 23:31:51.766295: step: 284/77, loss: 0.0004811729013454169 2023-01-23 23:31:53.065103: step: 288/77, loss: 1.8589149476611055e-05 2023-01-23 23:31:54.372950: step: 292/77, loss: 0.020074518397450447 2023-01-23 23:31:55.693968: step: 296/77, loss: 3.901428863173351e-05 2023-01-23 23:31:56.975795: step: 300/77, loss: 0.032694969326257706 2023-01-23 23:31:58.283779: step: 304/77, loss: 0.00020149351621512324 2023-01-23 23:31:59.576339: step: 308/77, loss: 0.03729041665792465 2023-01-23 23:32:00.891239: step: 312/77, loss: 7.061958604026586e-05 2023-01-23 23:32:02.200208: step: 316/77, loss: 0.002287800656631589 2023-01-23 23:32:03.526233: step: 320/77, loss: 0.00011647411156445742 2023-01-23 23:32:04.851261: step: 324/77, loss: 0.005575717426836491 2023-01-23 23:32:06.138977: step: 328/77, loss: 5.3803167247679085e-05 2023-01-23 23:32:07.444913: step: 332/77, loss: 0.032028112560510635 2023-01-23 23:32:08.725468: step: 336/77, loss: 0.025905797258019447 2023-01-23 23:32:10.063218: step: 340/77, loss: 0.0010144426487386227 2023-01-23 23:32:11.374519: step: 344/77, loss: 0.0006542807095684111 2023-01-23 23:32:12.707484: step: 348/77, loss: 3.084996569668874e-05 2023-01-23 23:32:14.021068: step: 352/77, loss: 0.050253357738256454 2023-01-23 23:32:15.371322: step: 356/77, loss: 0.00022352926316671073 2023-01-23 23:32:16.664276: step: 360/77, loss: 0.002475632121786475 2023-01-23 23:32:17.956911: step: 364/77, loss: 0.00017036257486324757 2023-01-23 23:32:19.269542: step: 368/77, loss: 2.4630685402371455e-06 2023-01-23 23:32:20.510427: step: 372/77, loss: 1.9371481130292523e-07 2023-01-23 23:32:21.809979: step: 376/77, loss: 4.898713086731732e-06 2023-01-23 23:32:23.138908: step: 380/77, loss: 2.603577740956098e-05 2023-01-23 23:32:24.443723: step: 384/77, loss: 0.055696628987789154 2023-01-23 23:32:25.739965: step: 388/77, loss: 3.929721424356103e-05 ================================================== Loss: 0.010 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Chinese: {'template': {'p': 0.984375, 'r': 0.48091603053435117, 'f1': 0.6461538461538462}, 'slot': {'p': 0.3793103448275862, 'r': 0.00993676603432701, 'f1': 0.01936619718309859}, 'combined': 0.012513542795232936, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Korean: {'template': {'p': 0.96875, 'r': 0.4732824427480916, 'f1': 0.6358974358974359}, 'slot': {'p': 0.3793103448275862, 'r': 0.00993676603432701, 'f1': 0.01936619718309859}, 'combined': 0.012314915131816538, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Russian: {'template': {'p': 0.9696969696969697, 'r': 0.48854961832061067, 'f1': 0.6497461928934011}, 'slot': {'p': 0.3793103448275862, 'r': 0.00993676603432701, 'f1': 0.01936619718309859}, 'combined': 0.012583112890541218, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 23 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:34:08.617121: step: 4/77, loss: 0.012566723860800266 2023-01-23 23:34:09.908584: step: 8/77, loss: 8.478707513859263e-07 2023-01-23 23:34:11.222717: step: 12/77, loss: 0.005080666858702898 2023-01-23 23:34:12.544942: step: 16/77, loss: 0.0034914321731776 2023-01-23 23:34:13.824849: step: 20/77, loss: 0.0004792654071934521 2023-01-23 23:34:15.154816: step: 24/77, loss: 0.00010682813444873318 2023-01-23 23:34:16.424579: step: 28/77, loss: 7.045550592010841e-05 2023-01-23 23:34:17.702625: step: 32/77, loss: 8.381021325476468e-05 2023-01-23 23:34:18.965392: step: 36/77, loss: 0.009439518675208092 2023-01-23 23:34:20.247161: step: 40/77, loss: 0.005691776983439922 2023-01-23 23:34:21.557122: step: 44/77, loss: 0.0011711755068972707 2023-01-23 23:34:22.886143: step: 48/77, loss: 0.003150845179334283 2023-01-23 23:34:24.177321: step: 52/77, loss: 0.00031852780375629663 2023-01-23 23:34:25.430650: step: 56/77, loss: 0.0007815890712663531 2023-01-23 23:34:26.735904: step: 60/77, loss: 0.0007130279554985464 2023-01-23 23:34:28.081107: step: 64/77, loss: 0.0240264181047678 2023-01-23 23:34:29.386520: step: 68/77, loss: 0.00012617622269317508 2023-01-23 23:34:30.658554: step: 72/77, loss: 0.018813449889421463 2023-01-23 23:34:31.959125: step: 76/77, loss: 0.004636474419385195 2023-01-23 23:34:33.246767: step: 80/77, loss: 0.00027595015126280487 2023-01-23 23:34:34.563989: step: 84/77, loss: 6.334046702249907e-06 2023-01-23 23:34:35.862093: step: 88/77, loss: 0.029080556705594063 2023-01-23 23:34:37.144821: step: 92/77, loss: 0.018633205443620682 2023-01-23 23:34:38.431505: step: 96/77, loss: 0.02069343626499176 2023-01-23 23:34:39.755414: step: 100/77, loss: 0.0008100596605800092 2023-01-23 23:34:41.061835: step: 104/77, loss: 7.562098289781716e-06 2023-01-23 23:34:42.290152: step: 108/77, loss: 3.5552866393118165e-06 2023-01-23 23:34:43.547442: step: 112/77, loss: 0.0007956930203363299 2023-01-23 23:34:44.808807: step: 116/77, loss: 1.4057108273846097e-05 2023-01-23 23:34:46.143771: step: 120/77, loss: 0.00010456127347424626 2023-01-23 23:34:47.453892: step: 124/77, loss: 7.063026146170159e-07 2023-01-23 23:34:48.737860: step: 128/77, loss: 0.00023995916126295924 2023-01-23 23:34:50.054105: step: 132/77, loss: 0.02845638059079647 2023-01-23 23:34:51.292746: step: 136/77, loss: 0.030174342915415764 2023-01-23 23:34:52.598545: step: 140/77, loss: 0.0004656286328099668 2023-01-23 23:34:53.978599: step: 144/77, loss: 0.00016981828957796097 2023-01-23 23:34:55.311536: step: 148/77, loss: 2.4861627025529742e-05 2023-01-23 23:34:56.576550: step: 152/77, loss: 4.1571697693143506e-06 2023-01-23 23:34:57.844693: step: 156/77, loss: 0.0012472313828766346 2023-01-23 23:34:59.106985: step: 160/77, loss: 0.0007646206067875028 2023-01-23 23:35:00.376994: step: 164/77, loss: 3.660557194962166e-05 2023-01-23 23:35:01.743639: step: 168/77, loss: 1.2873996411144617e-06 2023-01-23 23:35:03.083817: step: 172/77, loss: 5.140098437550478e-06 2023-01-23 23:35:04.378083: step: 176/77, loss: 5.215400022962058e-08 2023-01-23 23:35:05.620405: step: 180/77, loss: 0.007089770864695311 2023-01-23 23:35:06.874165: step: 184/77, loss: 6.2692542996956035e-06 2023-01-23 23:35:08.174159: step: 188/77, loss: 0.001343978801742196 2023-01-23 23:35:09.484653: step: 192/77, loss: 0.05258629098534584 2023-01-23 23:35:10.793854: step: 196/77, loss: 0.0030400222167372704 2023-01-23 23:35:12.064361: step: 200/77, loss: 0.02726101316511631 2023-01-23 23:35:13.350076: step: 204/77, loss: 0.06454948335886002 2023-01-23 23:35:14.660965: step: 208/77, loss: 5.5959285418794025e-06 2023-01-23 23:35:15.962075: step: 212/77, loss: 0.0009825187735259533 2023-01-23 23:35:17.244947: step: 216/77, loss: 4.3568870751187205e-06 2023-01-23 23:35:18.555520: step: 220/77, loss: 0.0007414943538606167 2023-01-23 23:35:19.832293: step: 224/77, loss: 1.1882290891662706e-05 2023-01-23 23:35:21.139263: step: 228/77, loss: 0.016352159902453423 2023-01-23 23:35:22.469030: step: 232/77, loss: 0.00029403012013062835 2023-01-23 23:35:23.822662: step: 236/77, loss: 0.013957513496279716 2023-01-23 23:35:25.164499: step: 240/77, loss: 0.0008857838693074882 2023-01-23 23:35:26.489812: step: 244/77, loss: 2.8222179025760852e-05 2023-01-23 23:35:27.788469: step: 248/77, loss: 0.00010344553447794169 2023-01-23 23:35:29.119597: step: 252/77, loss: 2.9653216415681527e-07 2023-01-23 23:35:30.386374: step: 256/77, loss: 1.1920915454766146e-07 2023-01-23 23:35:31.670764: step: 260/77, loss: 0.007125942036509514 2023-01-23 23:35:32.961281: step: 264/77, loss: 5.081273002360831e-07 2023-01-23 23:35:34.290167: step: 268/77, loss: 0.029344888404011726 2023-01-23 23:35:35.565706: step: 272/77, loss: 2.980204101277195e-07 2023-01-23 23:35:36.834535: step: 276/77, loss: 0.001337407506071031 2023-01-23 23:35:38.135382: step: 280/77, loss: 0.0013053520815446973 2023-01-23 23:35:39.443691: step: 284/77, loss: 0.00030600311583839357 2023-01-23 23:35:40.720012: step: 288/77, loss: 0.004457205068320036 2023-01-23 23:35:41.979031: step: 292/77, loss: 5.743621386500308e-06 2023-01-23 23:35:43.296303: step: 296/77, loss: 5.672270162904169e-06 2023-01-23 23:35:44.643848: step: 300/77, loss: 8.01328060333617e-05 2023-01-23 23:35:45.939416: step: 304/77, loss: 0.0063082557171583176 2023-01-23 23:35:47.208878: step: 308/77, loss: 0.0002656308060977608 2023-01-23 23:35:48.486639: step: 312/77, loss: 0.0001685560418991372 2023-01-23 23:35:49.832733: step: 316/77, loss: 5.051433618064038e-07 2023-01-23 23:35:51.181339: step: 320/77, loss: 0.0030908186454325914 2023-01-23 23:35:52.454125: step: 324/77, loss: 1.1965148587478325e-06 2023-01-23 23:35:53.775826: step: 328/77, loss: 0.00015451980289071798 2023-01-23 23:35:55.108188: step: 332/77, loss: 0.000776467437390238 2023-01-23 23:35:56.438832: step: 336/77, loss: 8.403077663388103e-05 2023-01-23 23:35:57.734948: step: 340/77, loss: 0.05382693558931351 2023-01-23 23:35:59.004943: step: 344/77, loss: 0.00024840643163770437 2023-01-23 23:36:00.346178: step: 348/77, loss: 0.0008327533723786473 2023-01-23 23:36:01.675023: step: 352/77, loss: 2.0257557480363175e-05 2023-01-23 23:36:02.973247: step: 356/77, loss: 1.4881431525282096e-05 2023-01-23 23:36:04.280755: step: 360/77, loss: 6.568313438037876e-06 2023-01-23 23:36:05.554291: step: 364/77, loss: 7.910434942459688e-05 2023-01-23 23:36:06.818512: step: 368/77, loss: 0.0018795005744323134 2023-01-23 23:36:08.117351: step: 372/77, loss: 1.0481529898243025e-05 2023-01-23 23:36:09.447156: step: 376/77, loss: 8.545060154574458e-06 2023-01-23 23:36:10.770463: step: 380/77, loss: 0.19529400765895844 2023-01-23 23:36:12.050917: step: 384/77, loss: 1.5300051018130034e-05 2023-01-23 23:36:13.350446: step: 388/77, loss: 2.5652367185102776e-05 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Chinese: {'template': {'p': 0.9714285714285714, 'r': 0.5190839694656488, 'f1': 0.6766169154228854}, 'slot': {'p': 0.48, 'r': 0.01084010840108401, 'f1': 0.02120141342756184}, 'combined': 0.014345234955962235, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Korean: {'template': {'p': 0.9577464788732394, 'r': 0.5190839694656488, 'f1': 0.6732673267326732}, 'slot': {'p': 0.4583333333333333, 'r': 0.00993676603432701, 'f1': 0.01945181255526083}, 'combined': 0.013096269839185508, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Russian: {'template': {'p': 0.9583333333333334, 'r': 0.5267175572519084, 'f1': 0.6798029556650246}, 'slot': {'p': 0.48, 'r': 0.01084010840108401, 'f1': 0.02120141342756184}, 'combined': 0.014412783512332678, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 24 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:37:56.106110: step: 4/77, loss: 7.27365659258794e-06 2023-01-23 23:37:57.425613: step: 8/77, loss: 1.238247136825521e-06 2023-01-23 23:37:58.698949: step: 12/77, loss: 3.6491248465608805e-05 2023-01-23 23:38:00.002549: step: 16/77, loss: 1.490007525717374e-05 2023-01-23 23:38:01.256206: step: 20/77, loss: 1.8179339633661584e-07 2023-01-23 23:38:02.557300: step: 24/77, loss: 2.4972685423563235e-05 2023-01-23 23:38:03.802228: step: 28/77, loss: 2.0712549542167835e-07 2023-01-23 23:38:05.054786: step: 32/77, loss: 0.013989298604428768 2023-01-23 23:38:06.402473: step: 36/77, loss: 1.8670249346541823e-06 2023-01-23 23:38:07.684347: step: 40/77, loss: 1.4274588693297119e-06 2023-01-23 23:38:09.037258: step: 44/77, loss: 0.0037133987061679363 2023-01-23 23:38:10.385111: step: 48/77, loss: 0.08478113263845444 2023-01-23 23:38:11.655805: step: 52/77, loss: 0.0002885919820982963 2023-01-23 23:38:12.935388: step: 56/77, loss: 0.039048902690410614 2023-01-23 23:38:14.248361: step: 60/77, loss: 0.00010248593025607988 2023-01-23 23:38:15.564122: step: 64/77, loss: 7.2267857831320725e-06 2023-01-23 23:38:16.850005: step: 68/77, loss: 0.022682351991534233 2023-01-23 23:38:18.130758: step: 72/77, loss: 8.319793414557353e-05 2023-01-23 23:38:19.460628: step: 76/77, loss: 0.11278786510229111 2023-01-23 23:38:20.742402: step: 80/77, loss: 0.00027442857390269637 2023-01-23 23:38:21.976504: step: 84/77, loss: 0.003319286974146962 2023-01-23 23:38:23.293288: step: 88/77, loss: 0.0001222712453454733 2023-01-23 23:38:24.638729: step: 92/77, loss: 0.01310756802558899 2023-01-23 23:38:25.920890: step: 96/77, loss: 7.5497505349630956e-06 2023-01-23 23:38:27.192210: step: 100/77, loss: 0.06587717682123184 2023-01-23 23:38:28.449868: step: 104/77, loss: 0.0018411398632451892 2023-01-23 23:38:29.794505: step: 108/77, loss: 0.0669705793261528 2023-01-23 23:38:31.101126: step: 112/77, loss: 6.61185276840115e-06 2023-01-23 23:38:32.346815: step: 116/77, loss: 0.017078397795557976 2023-01-23 23:38:33.627210: step: 120/77, loss: 0.0007926687248982489 2023-01-23 23:38:34.912246: step: 124/77, loss: 7.0508122007595375e-06 2023-01-23 23:38:36.227352: step: 128/77, loss: 0.016395514830946922 2023-01-23 23:38:37.491478: step: 132/77, loss: 0.00024466862669214606 2023-01-23 23:38:38.775373: step: 136/77, loss: 2.8732954888255335e-05 2023-01-23 23:38:40.072852: step: 140/77, loss: 2.6793524739332497e-05 2023-01-23 23:38:41.394172: step: 144/77, loss: 0.00013426571968011558 2023-01-23 23:38:42.736294: step: 148/77, loss: 0.007598715368658304 2023-01-23 23:38:43.995334: step: 152/77, loss: 3.5730990930460393e-05 2023-01-23 23:38:45.350227: step: 156/77, loss: 0.021153738722205162 2023-01-23 23:38:46.638558: step: 160/77, loss: 3.308024076886795e-07 2023-01-23 23:38:47.897857: step: 164/77, loss: 1.2218577012390597e-06 2023-01-23 23:38:49.253696: step: 168/77, loss: 2.4686836695764214e-05 2023-01-23 23:38:50.568967: step: 172/77, loss: 2.7910971766687e-05 2023-01-23 23:38:51.875928: step: 176/77, loss: 1.3962063576400396e-06 2023-01-23 23:38:53.154427: step: 180/77, loss: 0.00042038323590531945 2023-01-23 23:38:54.473402: step: 184/77, loss: 7.789325536577962e-06 2023-01-23 23:38:55.815539: step: 188/77, loss: 0.0001271903602173552 2023-01-23 23:38:57.161400: step: 192/77, loss: 0.002623402513563633 2023-01-23 23:38:58.420745: step: 196/77, loss: 0.004340842831879854 2023-01-23 23:38:59.780429: step: 200/77, loss: 2.1268249838612974e-05 2023-01-23 23:39:01.092324: step: 204/77, loss: 3.217386256437749e-05 2023-01-23 23:39:02.389676: step: 208/77, loss: 5.2126033551758155e-05 2023-01-23 23:39:03.750627: step: 212/77, loss: 2.491239683877211e-06 2023-01-23 23:39:05.043600: step: 216/77, loss: 0.00140212825499475 2023-01-23 23:39:06.312005: step: 220/77, loss: 0.02959577552974224 2023-01-23 23:39:07.639979: step: 224/77, loss: 2.7585945645114407e-05 2023-01-23 23:39:08.976029: step: 228/77, loss: 0.02623019367456436 2023-01-23 23:39:10.255065: step: 232/77, loss: 0.00029231017106212676 2023-01-23 23:39:11.552524: step: 236/77, loss: 0.00011233628174522892 2023-01-23 23:39:12.832607: step: 240/77, loss: 2.836989324350725e-06 2023-01-23 23:39:14.089094: step: 244/77, loss: 4.0828840042195225e-07 2023-01-23 23:39:15.344186: step: 248/77, loss: 5.672002316714497e-06 2023-01-23 23:39:16.578360: step: 252/77, loss: 1.2763992344844155e-05 2023-01-23 23:39:17.832927: step: 256/77, loss: 3.2168070447369246e-06 2023-01-23 23:39:19.118267: step: 260/77, loss: 0.004356747958809137 2023-01-23 23:39:20.435066: step: 264/77, loss: 1.299349150940543e-06 2023-01-23 23:39:21.684058: step: 268/77, loss: 0.0002825258707161993 2023-01-23 23:39:23.016559: step: 272/77, loss: 1.18460711746593e-05 2023-01-23 23:39:24.323957: step: 276/77, loss: 5.490722742251819e-06 2023-01-23 23:39:25.656125: step: 280/77, loss: 0.01454936247318983 2023-01-23 23:39:26.965435: step: 284/77, loss: 0.048553213477134705 2023-01-23 23:39:28.251121: step: 288/77, loss: 1.5159343092818744e-05 2023-01-23 23:39:29.535562: step: 292/77, loss: 0.00015713486936874688 2023-01-23 23:39:30.799868: step: 296/77, loss: 0.0001880024210549891 2023-01-23 23:39:32.110027: step: 300/77, loss: 4.002780406153761e-05 2023-01-23 23:39:33.338797: step: 304/77, loss: 0.0004623873101081699 2023-01-23 23:39:34.670413: step: 308/77, loss: 4.9771777412388474e-05 2023-01-23 23:39:35.966241: step: 312/77, loss: 0.0005654781707562506 2023-01-23 23:39:37.256823: step: 316/77, loss: 1.0839068636414595e-05 2023-01-23 23:39:38.542355: step: 320/77, loss: 0.00010301794100087136 2023-01-23 23:39:39.797755: step: 324/77, loss: 1.5710642401245423e-05 2023-01-23 23:39:41.117592: step: 328/77, loss: 0.04304623603820801 2023-01-23 23:39:42.365384: step: 332/77, loss: 3.624851888162084e-05 2023-01-23 23:39:43.659144: step: 336/77, loss: 0.00030866515589877963 2023-01-23 23:39:44.973151: step: 340/77, loss: 6.1949635892233346e-06 2023-01-23 23:39:46.272819: step: 344/77, loss: 4.167402221355587e-05 2023-01-23 23:39:47.564360: step: 348/77, loss: 0.04800989478826523 2023-01-23 23:39:48.842718: step: 352/77, loss: 8.210468536162807e-07 2023-01-23 23:39:50.158159: step: 356/77, loss: 1.8006705431616865e-05 2023-01-23 23:39:51.455895: step: 360/77, loss: 0.0002485654258634895 2023-01-23 23:39:52.798621: step: 364/77, loss: 0.006535383872687817 2023-01-23 23:39:54.106813: step: 368/77, loss: 0.00163769640494138 2023-01-23 23:39:55.419029: step: 372/77, loss: 0.0006401842692866921 2023-01-23 23:39:56.765143: step: 376/77, loss: 1.332126657871413e-06 2023-01-23 23:39:58.087710: step: 380/77, loss: 0.00102709059137851 2023-01-23 23:39:59.385971: step: 384/77, loss: 1.3253316865302622e-05 2023-01-23 23:40:00.742442: step: 388/77, loss: 0.001358823268674314 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 24} Test Chinese: {'template': {'p': 0.9866666666666667, 'r': 0.5648854961832062, 'f1': 0.7184466019417477}, 'slot': {'p': 0.45454545454545453, 'r': 0.013550135501355014, 'f1': 0.02631578947368421}, 'combined': 0.018906489524782834, 'epoch': 24} Dev Korean: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 24} Test Korean: {'template': {'p': 0.9864864864864865, 'r': 0.5572519083969466, 'f1': 0.7121951219512196}, 'slot': {'p': 0.45454545454545453, 'r': 0.013550135501355014, 'f1': 0.02631578947368421}, 'combined': 0.018741976893453145, 'epoch': 24} Dev Russian: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 24} Test Russian: {'template': {'p': 0.9866666666666667, 'r': 0.5648854961832062, 'f1': 0.7184466019417477}, 'slot': {'p': 0.4411764705882353, 'r': 0.013550135501355014, 'f1': 0.02629272567922875}, 'combined': 0.018889919420028423, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 25 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:41:43.803003: step: 4/77, loss: 0.01842794567346573 2023-01-23 23:41:45.105733: step: 8/77, loss: 0.0069998279213905334 2023-01-23 23:41:46.450620: step: 12/77, loss: 8.705254913365934e-06 2023-01-23 23:41:47.723307: step: 16/77, loss: 1.9711498680408113e-05 2023-01-23 23:41:49.015557: step: 20/77, loss: 0.0018902001902461052 2023-01-23 23:41:50.324743: step: 24/77, loss: 9.889909415505826e-05 2023-01-23 23:41:51.570463: step: 28/77, loss: 0.010225817561149597 2023-01-23 23:41:52.827716: step: 32/77, loss: 0.00024162388581316918 2023-01-23 23:41:54.139753: step: 36/77, loss: 4.7459459892706946e-05 2023-01-23 23:41:55.423668: step: 40/77, loss: 0.00014923862181603909 2023-01-23 23:41:56.672492: step: 44/77, loss: 2.682207522752833e-08 2023-01-23 23:41:57.888696: step: 48/77, loss: 0.0014505910221487284 2023-01-23 23:41:59.182479: step: 52/77, loss: 0.0025084619410336018 2023-01-23 23:42:00.510885: step: 56/77, loss: 0.009419834241271019 2023-01-23 23:42:01.803109: step: 60/77, loss: 3.4378666896373034e-05 2023-01-23 23:42:03.053759: step: 64/77, loss: 7.156289939302951e-05 2023-01-23 23:42:04.353365: step: 68/77, loss: 5.87022805120796e-06 2023-01-23 23:42:05.682725: step: 72/77, loss: 5.6899873015936464e-05 2023-01-23 23:42:06.996882: step: 76/77, loss: 0.009436637163162231 2023-01-23 23:42:08.297609: step: 80/77, loss: 2.0404635506565683e-05 2023-01-23 23:42:09.591287: step: 84/77, loss: 0.003238705452531576 2023-01-23 23:42:10.922629: step: 88/77, loss: 0.00015356639050878584 2023-01-23 23:42:12.229129: step: 92/77, loss: 4.204708602628671e-06 2023-01-23 23:42:13.499262: step: 96/77, loss: 3.562411438906565e-05 2023-01-23 23:42:14.811464: step: 100/77, loss: 4.783595068147406e-05 2023-01-23 23:42:16.106722: step: 104/77, loss: 0.0005482888664118946 2023-01-23 23:42:17.405975: step: 108/77, loss: 6.809772798987979e-07 2023-01-23 23:42:18.706692: step: 112/77, loss: 1.9110122593701817e-05 2023-01-23 23:42:20.005119: step: 116/77, loss: 6.269341611186974e-06 2023-01-23 23:42:21.328632: step: 120/77, loss: 0.00022487477690447122 2023-01-23 23:42:22.681078: step: 124/77, loss: 0.00015616673044860363 2023-01-23 23:42:23.966955: step: 128/77, loss: 1.8163501636081492e-06 2023-01-23 23:42:25.280893: step: 132/77, loss: 2.269335482196766e-06 2023-01-23 23:42:26.575465: step: 136/77, loss: 3.60607316451933e-07 2023-01-23 23:42:27.845787: step: 140/77, loss: 0.0011399483773857355 2023-01-23 23:42:29.153748: step: 144/77, loss: 0.00021700444631278515 2023-01-23 23:42:30.441311: step: 148/77, loss: 4.586233899317449e-06 2023-01-23 23:42:31.775840: step: 152/77, loss: 0.025929274037480354 2023-01-23 23:42:33.107335: step: 156/77, loss: 0.14960448443889618 2023-01-23 23:42:34.420957: step: 160/77, loss: 0.0028908923268318176 2023-01-23 23:42:35.685075: step: 164/77, loss: 1.9430433439993067e-06 2023-01-23 23:42:36.921245: step: 168/77, loss: 0.0003239882062189281 2023-01-23 23:42:38.199201: step: 172/77, loss: 0.023600086569786072 2023-01-23 23:42:39.533553: step: 176/77, loss: 2.9996070225024596e-05 2023-01-23 23:42:40.810712: step: 180/77, loss: 0.0023301143664866686 2023-01-23 23:42:42.098654: step: 184/77, loss: 6.973668291720969e-07 2023-01-23 23:42:43.405792: step: 188/77, loss: 4.741271823149873e-06 2023-01-23 23:42:44.681707: step: 192/77, loss: 2.3011994926491752e-05 2023-01-23 23:42:45.958245: step: 196/77, loss: 2.5616154744056985e-05 2023-01-23 23:42:47.275366: step: 200/77, loss: 0.00017460151866544038 2023-01-23 23:42:48.585769: step: 204/77, loss: 1.892438206141378e-07 2023-01-23 23:42:49.923870: step: 208/77, loss: 1.0356236543884734e-06 2023-01-23 23:42:51.252466: step: 212/77, loss: 0.011446312069892883 2023-01-23 23:42:52.559788: step: 216/77, loss: 0.02305716834962368 2023-01-23 23:42:53.843928: step: 220/77, loss: 6.117016710049938e-06 2023-01-23 23:42:55.173879: step: 224/77, loss: 0.03381283953785896 2023-01-23 23:42:56.468315: step: 228/77, loss: 0.00010447140084579587 2023-01-23 23:42:57.758302: step: 232/77, loss: 0.002563286339864135 2023-01-23 23:42:59.095295: step: 236/77, loss: 2.2440067368734162e-06 2023-01-23 23:43:00.381077: step: 240/77, loss: 8.55996859172592e-06 2023-01-23 23:43:01.684226: step: 244/77, loss: 0.03666967898607254 2023-01-23 23:43:02.980263: step: 248/77, loss: 0.0009382938733324409 2023-01-23 23:43:04.305431: step: 252/77, loss: 1.1827602065750398e-05 2023-01-23 23:43:05.631431: step: 256/77, loss: 1.7730711988406256e-05 2023-01-23 23:43:06.930054: step: 260/77, loss: 0.015479068271815777 2023-01-23 23:43:08.186768: step: 264/77, loss: 1.5745154087198898e-05 2023-01-23 23:43:09.492574: step: 268/77, loss: 2.281257366121281e-06 2023-01-23 23:43:10.759524: step: 272/77, loss: 0.00013983561075292528 2023-01-23 23:43:12.019934: step: 276/77, loss: 1.2718570360448211e-05 2023-01-23 23:43:13.258462: step: 280/77, loss: 9.670064901001751e-05 2023-01-23 23:43:14.586946: step: 284/77, loss: 0.0005530952475965023 2023-01-23 23:43:15.875023: step: 288/77, loss: 0.0029514539055526257 2023-01-23 23:43:17.154842: step: 292/77, loss: 0.00041223972220905125 2023-01-23 23:43:18.494538: step: 296/77, loss: 0.016402151435613632 2023-01-23 23:43:19.847662: step: 300/77, loss: 7.928155355330091e-06 2023-01-23 23:43:21.116355: step: 304/77, loss: 0.0235972311347723 2023-01-23 23:43:22.409278: step: 308/77, loss: 0.141937717795372 2023-01-23 23:43:23.708259: step: 312/77, loss: 0.00022633300977759063 2023-01-23 23:43:25.006502: step: 316/77, loss: 4.3762211134890094e-06 2023-01-23 23:43:26.288812: step: 320/77, loss: 2.618359576445073e-05 2023-01-23 23:43:27.602829: step: 324/77, loss: 0.060009852051734924 2023-01-23 23:43:28.973819: step: 328/77, loss: 3.010000000358559e-07 2023-01-23 23:43:30.275903: step: 332/77, loss: 0.0031362068839371204 2023-01-23 23:43:31.587155: step: 336/77, loss: 9.342830935565871e-07 2023-01-23 23:43:32.884037: step: 340/77, loss: 2.3841704432925326e-07 2023-01-23 23:43:34.221153: step: 344/77, loss: 4.756555790663697e-05 2023-01-23 23:43:35.517887: step: 348/77, loss: 0.00014611869119107723 2023-01-23 23:43:36.849574: step: 352/77, loss: 3.068546357098967e-05 2023-01-23 23:43:38.130589: step: 356/77, loss: 0.000274586578598246 2023-01-23 23:43:39.456768: step: 360/77, loss: 2.220268413566373e-07 2023-01-23 23:43:40.768041: step: 364/77, loss: 0.009011710993945599 2023-01-23 23:43:42.071934: step: 368/77, loss: 2.03244894692034e-06 2023-01-23 23:43:43.352039: step: 372/77, loss: 3.11428630084265e-05 2023-01-23 23:43:44.605809: step: 376/77, loss: 8.270069429272553e-07 2023-01-23 23:43:45.866116: step: 380/77, loss: 1.7652571841608733e-05 2023-01-23 23:43:47.158276: step: 384/77, loss: 5.856064717590925e-07 2023-01-23 23:43:48.460519: step: 388/77, loss: 0.05170586705207825 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Chinese: {'template': {'p': 0.9855072463768116, 'r': 0.5190839694656488, 'f1': 0.68}, 'slot': {'p': 0.5, 'r': 0.014453477868112014, 'f1': 0.028094820017559263}, 'combined': 0.0191044776119403, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Korean: {'template': {'p': 0.9855072463768116, 'r': 0.5190839694656488, 'f1': 0.68}, 'slot': {'p': 0.4375, 'r': 0.012646793134598013, 'f1': 0.024582967515364356}, 'combined': 0.016716417910447763, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Russian: {'template': {'p': 0.9857142857142858, 'r': 0.5267175572519084, 'f1': 0.6865671641791045}, 'slot': {'p': 0.4375, 'r': 0.012646793134598013, 'f1': 0.024582967515364356}, 'combined': 0.01687785829413075, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 26 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:45:31.257571: step: 4/77, loss: 5.481765583681408e-06 2023-01-23 23:45:32.525943: step: 8/77, loss: 7.747093150101136e-06 2023-01-23 23:45:33.807183: step: 12/77, loss: 7.727561023784801e-05 2023-01-23 23:45:35.102055: step: 16/77, loss: 5.4686453950125724e-05 2023-01-23 23:45:36.323880: step: 20/77, loss: 1.919623537105508e-05 2023-01-23 23:45:37.664716: step: 24/77, loss: 1.2736864846374374e-05 2023-01-23 23:45:38.945787: step: 28/77, loss: 0.00030758618959225714 2023-01-23 23:45:40.271547: step: 32/77, loss: 1.3266368114273064e-05 2023-01-23 23:45:41.560565: step: 36/77, loss: 2.479479917383287e-06 2023-01-23 23:45:42.833897: step: 40/77, loss: 0.00022657167573925108 2023-01-23 23:45:44.131991: step: 44/77, loss: 0.0003512536932248622 2023-01-23 23:45:45.444413: step: 48/77, loss: 0.0001523706887383014 2023-01-23 23:45:46.733349: step: 52/77, loss: 3.3064022773032775e-06 2023-01-23 23:45:48.034920: step: 56/77, loss: 2.327267975488212e-05 2023-01-23 23:45:49.311471: step: 60/77, loss: 4.443985380930826e-05 2023-01-23 23:45:50.641556: step: 64/77, loss: 4.193187123746611e-05 2023-01-23 23:45:51.979070: step: 68/77, loss: 5.6550146837253124e-05 2023-01-23 23:45:53.278201: step: 72/77, loss: 6.289097655098885e-05 2023-01-23 23:45:54.563273: step: 76/77, loss: 6.619805208174512e-05 2023-01-23 23:45:55.875859: step: 80/77, loss: 8.180631994036958e-07 2023-01-23 23:45:57.178542: step: 84/77, loss: 0.0005231473478488624 2023-01-23 23:45:58.493196: step: 88/77, loss: 5.478812818182632e-05 2023-01-23 23:45:59.799834: step: 92/77, loss: 1.8045072920358507e-06 2023-01-23 23:46:01.128480: step: 96/77, loss: 0.06642985343933105 2023-01-23 23:46:02.422838: step: 100/77, loss: 4.917359888167994e-07 2023-01-23 23:46:03.666464: step: 104/77, loss: 1.1980254157606396e-06 2023-01-23 23:46:04.969387: step: 108/77, loss: 4.111695307074115e-05 2023-01-23 23:46:06.263919: step: 112/77, loss: 1.090757336896786e-06 2023-01-23 23:46:07.545414: step: 116/77, loss: 6.899875734234229e-05 2023-01-23 23:46:08.815143: step: 120/77, loss: 0.00800521019846201 2023-01-23 23:46:10.113135: step: 124/77, loss: 3.220645885448903e-05 2023-01-23 23:46:11.414737: step: 128/77, loss: 0.0010096518089994788 2023-01-23 23:46:12.715569: step: 132/77, loss: 2.1828752778674243e-06 2023-01-23 23:46:14.025746: step: 136/77, loss: 7.152478360694658e-07 2023-01-23 23:46:15.342252: step: 140/77, loss: 0.00018325047858525068 2023-01-23 23:46:16.612062: step: 144/77, loss: 1.4789173292228952e-05 2023-01-23 23:46:17.892071: step: 148/77, loss: 0.0003508915542624891 2023-01-23 23:46:19.269892: step: 152/77, loss: 0.021963827311992645 2023-01-23 23:46:20.547977: step: 156/77, loss: 0.0004927744157612324 2023-01-23 23:46:21.844907: step: 160/77, loss: 0.0031166928820312023 2023-01-23 23:46:23.158185: step: 164/77, loss: 4.25084681410226e-06 2023-01-23 23:46:24.463721: step: 168/77, loss: 0.0006452484522014856 2023-01-23 23:46:25.743001: step: 172/77, loss: 7.3308078754052985e-06 2023-01-23 23:46:27.060664: step: 176/77, loss: 3.883035788021516e-06 2023-01-23 23:46:28.387014: step: 180/77, loss: 0.004640428815037012 2023-01-23 23:46:29.702864: step: 184/77, loss: 0.05861657112836838 2023-01-23 23:46:31.018258: step: 188/77, loss: 0.02396177127957344 2023-01-23 23:46:32.267391: step: 192/77, loss: 2.979371856781654e-05 2023-01-23 23:46:33.533636: step: 196/77, loss: 0.00015523187175858766 2023-01-23 23:46:34.905398: step: 200/77, loss: 5.799734481115593e-06 2023-01-23 23:46:36.208430: step: 204/77, loss: 0.00017246135394088924 2023-01-23 23:46:37.549252: step: 208/77, loss: 0.000321808154694736 2023-01-23 23:46:38.864274: step: 212/77, loss: 6.005029149491747e-07 2023-01-23 23:46:40.157427: step: 216/77, loss: 2.1190633560763672e-05 2023-01-23 23:46:41.464602: step: 220/77, loss: 0.004656767938286066 2023-01-23 23:46:42.756897: step: 224/77, loss: 0.0017464417032897472 2023-01-23 23:46:44.002219: step: 228/77, loss: 0.0005598579300567508 2023-01-23 23:46:45.335819: step: 232/77, loss: 0.00011804819223470986 2023-01-23 23:46:46.649109: step: 236/77, loss: 0.004002984147518873 2023-01-23 23:46:47.963659: step: 240/77, loss: 0.00037862331373617053 2023-01-23 23:46:49.269983: step: 244/77, loss: 0.00023899652296677232 2023-01-23 23:46:50.570702: step: 248/77, loss: 0.001696170074865222 2023-01-23 23:46:51.857860: step: 252/77, loss: 0.00019286083988845348 2023-01-23 23:46:53.124156: step: 256/77, loss: 0.001271429588086903 2023-01-23 23:46:54.416942: step: 260/77, loss: 0.0024752700701355934 2023-01-23 23:46:55.719316: step: 264/77, loss: 0.0002661732432898134 2023-01-23 23:46:56.987571: step: 268/77, loss: 7.176605868153274e-05 2023-01-23 23:46:58.305328: step: 272/77, loss: 0.006966698449105024 2023-01-23 23:46:59.562062: step: 276/77, loss: 0.0004718205891549587 2023-01-23 23:47:00.888006: step: 280/77, loss: 2.8281247068662196e-06 2023-01-23 23:47:02.176706: step: 284/77, loss: 0.00025430243113078177 2023-01-23 23:47:03.496234: step: 288/77, loss: 3.74128330804524e-06 2023-01-23 23:47:04.810461: step: 292/77, loss: 0.002602202817797661 2023-01-23 23:47:06.093847: step: 296/77, loss: 9.79024753178237e-06 2023-01-23 23:47:07.360406: step: 300/77, loss: 9.252102245227434e-06 2023-01-23 23:47:08.632422: step: 304/77, loss: 7.802544860169291e-05 2023-01-23 23:47:09.919804: step: 308/77, loss: 9.53261333052069e-05 2023-01-23 23:47:11.272685: step: 312/77, loss: 1.3244408364698756e-05 2023-01-23 23:47:12.589953: step: 316/77, loss: 0.01857200264930725 2023-01-23 23:47:13.954863: step: 320/77, loss: 9.210885764332488e-06 2023-01-23 23:47:15.217601: step: 324/77, loss: 0.0003791408962570131 2023-01-23 23:47:16.493632: step: 328/77, loss: 2.8768197807949036e-05 2023-01-23 23:47:17.810347: step: 332/77, loss: 1.5513876860495657e-05 2023-01-23 23:47:19.093829: step: 336/77, loss: 0.000338991463650018 2023-01-23 23:47:20.405033: step: 340/77, loss: 0.0005667489604093134 2023-01-23 23:47:21.759734: step: 344/77, loss: 0.003694930812343955 2023-01-23 23:47:23.092762: step: 348/77, loss: 4.037413600599393e-05 2023-01-23 23:47:24.357386: step: 352/77, loss: 0.02028324268758297 2023-01-23 23:47:25.677804: step: 356/77, loss: 3.3076944419008214e-06 2023-01-23 23:47:26.952253: step: 360/77, loss: 9.149199513558415e-07 2023-01-23 23:47:28.308348: step: 364/77, loss: 0.011678007431328297 2023-01-23 23:47:29.591162: step: 368/77, loss: 0.0001280040160054341 2023-01-23 23:47:30.888541: step: 372/77, loss: 9.730198371471488e-07 2023-01-23 23:47:32.169258: step: 376/77, loss: 5.811441994296729e-08 2023-01-23 23:47:33.449625: step: 380/77, loss: 6.0846696214866824e-06 2023-01-23 23:47:34.752966: step: 384/77, loss: 8.547366451239213e-05 2023-01-23 23:47:36.071499: step: 388/77, loss: 0.00029323625494726 ================================================== Loss: 0.003 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 26} Test Chinese: {'template': {'p': 0.9696969696969697, 'r': 0.48854961832061067, 'f1': 0.6497461928934011}, 'slot': {'p': 0.3103448275862069, 'r': 0.008130081300813009, 'f1': 0.015845070422535214}, 'combined': 0.01029527418317009, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.04662219154972779, 'epoch': 26} Test Korean: {'template': {'p': 0.9701492537313433, 'r': 0.4961832061068702, 'f1': 0.6565656565656566}, 'slot': {'p': 0.3103448275862069, 'r': 0.008130081300813009, 'f1': 0.015845070422535214}, 'combined': 0.010403329065300898, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.48717948717948717, 'r': 0.035916824196597356, 'f1': 0.06690140845070423}, 'combined': 0.04839676356008391, 'epoch': 26} Test Russian: {'template': {'p': 0.9701492537313433, 'r': 0.4961832061068702, 'f1': 0.6565656565656566}, 'slot': {'p': 0.32142857142857145, 'r': 0.008130081300813009, 'f1': 0.01585903083700441}, 'combined': 0.010412494993992794, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 27 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:49:18.853943: step: 4/77, loss: 2.7505857360665686e-05 2023-01-23 23:49:20.155738: step: 8/77, loss: 0.0011554948287084699 2023-01-23 23:49:21.477914: step: 12/77, loss: 1.7303980712313205e-05 2023-01-23 23:49:22.742989: step: 16/77, loss: 1.1186986739630811e-05 2023-01-23 23:49:24.081483: step: 20/77, loss: 0.0033815372735261917 2023-01-23 23:49:25.379666: step: 24/77, loss: 2.60449974121002e-06 2023-01-23 23:49:26.678950: step: 28/77, loss: 0.004366590641438961 2023-01-23 23:49:27.959159: step: 32/77, loss: 1.6641301044728607e-05 2023-01-23 23:49:29.205974: step: 36/77, loss: 2.533194809473116e-08 2023-01-23 23:49:30.465694: step: 40/77, loss: 0.0004364684864412993 2023-01-23 23:49:31.802302: step: 44/77, loss: 1.937149818331818e-08 2023-01-23 23:49:33.091425: step: 48/77, loss: 1.1250177749388968e-06 2023-01-23 23:49:34.412612: step: 52/77, loss: 0.0012814695946872234 2023-01-23 23:49:35.713697: step: 56/77, loss: 3.8742774677302805e-07 2023-01-23 23:49:37.009797: step: 60/77, loss: 2.1708592612412758e-05 2023-01-23 23:49:38.279046: step: 64/77, loss: 3.951329290430294e-06 2023-01-23 23:49:39.562424: step: 68/77, loss: 1.3965031939733308e-05 2023-01-23 23:49:40.835261: step: 72/77, loss: 4.4106669520260766e-05 2023-01-23 23:49:42.166098: step: 76/77, loss: 0.017457332462072372 2023-01-23 23:49:43.424312: step: 80/77, loss: 5.629240331472829e-05 2023-01-23 23:49:44.715662: step: 84/77, loss: 0.00011551618081284687 2023-01-23 23:49:45.970514: step: 88/77, loss: 5.334536581358407e-07 2023-01-23 23:49:47.279169: step: 92/77, loss: 6.884167760290438e-07 2023-01-23 23:49:48.569237: step: 96/77, loss: 1.5734174667159095e-05 2023-01-23 23:49:49.877400: step: 100/77, loss: 0.00032483390532433987 2023-01-23 23:49:51.144832: step: 104/77, loss: 1.1384234994693543e-06 2023-01-23 23:49:52.487837: step: 108/77, loss: 0.03720640763640404 2023-01-23 23:49:53.750710: step: 112/77, loss: 4.27724517066963e-05 2023-01-23 23:49:55.040354: step: 116/77, loss: 0.00010154680057894439 2023-01-23 23:49:56.329201: step: 120/77, loss: 1.4752107801996317e-07 2023-01-23 23:49:57.624762: step: 124/77, loss: 0.00013078686606604606 2023-01-23 23:49:58.896936: step: 128/77, loss: 2.6672984176911996e-07 2023-01-23 23:50:00.153174: step: 132/77, loss: 0.00040122828795574605 2023-01-23 23:50:01.396396: step: 136/77, loss: 1.206991413482683e-07 2023-01-23 23:50:02.693177: step: 140/77, loss: 2.5793294753384544e-06 2023-01-23 23:50:04.012075: step: 144/77, loss: 2.38417555920023e-07 2023-01-23 23:50:05.308849: step: 148/77, loss: 2.6746415642264765e-06 2023-01-23 23:50:06.564330: step: 152/77, loss: 1.2441074431990273e-05 2023-01-23 23:50:07.861038: step: 156/77, loss: 0.0029361483175307512 2023-01-23 23:50:09.200492: step: 160/77, loss: 0.08445204049348831 2023-01-23 23:50:10.484643: step: 164/77, loss: 0.028432684019207954 2023-01-23 23:50:11.787779: step: 168/77, loss: 6.019993747941044e-07 2023-01-23 23:50:13.075755: step: 172/77, loss: 0.0037985225208103657 2023-01-23 23:50:14.380155: step: 176/77, loss: 3.0629413231508806e-05 2023-01-23 23:50:15.709293: step: 180/77, loss: 3.397369027879904e-06 2023-01-23 23:50:17.009621: step: 184/77, loss: 0.06735138595104218 2023-01-23 23:50:18.321005: step: 188/77, loss: 2.424426202196628e-05 2023-01-23 23:50:19.587887: step: 192/77, loss: 0.040859125554561615 2023-01-23 23:50:20.877963: step: 196/77, loss: 0.005081044510006905 2023-01-23 23:50:22.235416: step: 200/77, loss: 1.019201590679586e-05 2023-01-23 23:50:23.510040: step: 204/77, loss: 0.00355838006362319 2023-01-23 23:50:24.830788: step: 208/77, loss: 0.030417632311582565 2023-01-23 23:50:26.168525: step: 212/77, loss: 0.0011557607213035226 2023-01-23 23:50:27.460815: step: 216/77, loss: 0.00011477222142275423 2023-01-23 23:50:28.736704: step: 220/77, loss: 0.041373834013938904 2023-01-23 23:50:30.046213: step: 224/77, loss: 0.022509494796395302 2023-01-23 23:50:31.378143: step: 228/77, loss: 0.00010602780093904585 2023-01-23 23:50:32.676713: step: 232/77, loss: 2.339477020996128e-07 2023-01-23 23:50:33.978222: step: 236/77, loss: 0.004461529199033976 2023-01-23 23:50:35.264605: step: 240/77, loss: 2.2500658758417558e-07 2023-01-23 23:50:36.531907: step: 244/77, loss: 1.192092646817855e-08 2023-01-23 23:50:37.827239: step: 248/77, loss: 0.21817706525325775 2023-01-23 23:50:39.180771: step: 252/77, loss: 0.001352312508970499 2023-01-23 23:50:40.506264: step: 256/77, loss: 6.764993258912e-07 2023-01-23 23:50:41.838923: step: 260/77, loss: 0.00010967526759486645 2023-01-23 23:50:43.147554: step: 264/77, loss: 0.008482849225401878 2023-01-23 23:50:44.525975: step: 268/77, loss: 1.469506969442591e-05 2023-01-23 23:50:45.870612: step: 272/77, loss: 0.0027691826689988375 2023-01-23 23:50:47.189614: step: 276/77, loss: 0.0015755686908960342 2023-01-23 23:50:48.484229: step: 280/77, loss: 2.8417249268386513e-05 2023-01-23 23:50:49.831437: step: 284/77, loss: 1.1840852494060528e-05 2023-01-23 23:50:51.147317: step: 288/77, loss: 0.004036055412143469 2023-01-23 23:50:52.436527: step: 292/77, loss: 0.00029007441480644047 2023-01-23 23:50:53.706961: step: 296/77, loss: 0.01541918981820345 2023-01-23 23:50:55.035154: step: 300/77, loss: 0.00037965853698551655 2023-01-23 23:50:56.307289: step: 304/77, loss: 0.00203267065808177 2023-01-23 23:50:57.605382: step: 308/77, loss: 0.008641621097922325 2023-01-23 23:50:58.925249: step: 312/77, loss: 0.0005436694482341409 2023-01-23 23:51:00.180858: step: 316/77, loss: 4.508683105086675e-06 2023-01-23 23:51:01.437176: step: 320/77, loss: 1.2735614291159436e-05 2023-01-23 23:51:02.737011: step: 324/77, loss: 0.0005036424263380468 2023-01-23 23:51:04.053110: step: 328/77, loss: 2.058391328318976e-05 2023-01-23 23:51:05.313273: step: 332/77, loss: 5.881480683456175e-05 2023-01-23 23:51:06.582352: step: 336/77, loss: 6.234457396203652e-05 2023-01-23 23:51:07.882948: step: 340/77, loss: 3.680566464936419e-07 2023-01-23 23:51:09.132207: step: 344/77, loss: 2.4716209736652672e-05 2023-01-23 23:51:10.513959: step: 348/77, loss: 0.006677013821899891 2023-01-23 23:51:11.869757: step: 352/77, loss: 8.443155093118548e-05 2023-01-23 23:51:13.197426: step: 356/77, loss: 0.009522883221507072 2023-01-23 23:51:14.489437: step: 360/77, loss: 1.6093220267521247e-07 2023-01-23 23:51:15.794599: step: 364/77, loss: 0.0004596296639647335 2023-01-23 23:51:17.087126: step: 368/77, loss: 2.1775504137622193e-05 2023-01-23 23:51:18.418822: step: 372/77, loss: 7.584843388031004e-06 2023-01-23 23:51:19.771074: step: 376/77, loss: 0.05923104286193848 2023-01-23 23:51:21.057756: step: 380/77, loss: 0.05634100362658501 2023-01-23 23:51:22.364969: step: 384/77, loss: 0.010661378502845764 2023-01-23 23:51:23.657190: step: 388/77, loss: 0.04150150343775749 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 27} Test Chinese: {'template': {'p': 0.935064935064935, 'r': 0.549618320610687, 'f1': 0.6923076923076923}, 'slot': {'p': 0.4, 'r': 0.012646793134598013, 'f1': 0.0245183887915937}, 'combined': 0.01697426916341102, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 27} Test Korean: {'template': {'p': 0.9358974358974359, 'r': 0.5572519083969466, 'f1': 0.6985645933014354}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.018335028695575736, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 27} Test Russian: {'template': {'p': 0.9358974358974359, 'r': 0.5572519083969466, 'f1': 0.6985645933014354}, 'slot': {'p': 0.40540540540540543, 'r': 0.013550135501355014, 'f1': 0.026223776223776227}, 'combined': 0.01831900157259009, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 28 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:53:06.524288: step: 4/77, loss: 9.64068476605462e-07 2023-01-23 23:53:07.791576: step: 8/77, loss: 2.7865053198183887e-07 2023-01-23 23:53:09.036519: step: 12/77, loss: 1.0566669516265392e-05 2023-01-23 23:53:10.331371: step: 16/77, loss: 1.6584285731369164e-06 2023-01-23 23:53:11.604049: step: 20/77, loss: 0.00015290960436686873 2023-01-23 23:53:12.883291: step: 24/77, loss: 0.003574878443032503 2023-01-23 23:53:14.159661: step: 28/77, loss: 1.589842577232048e-05 2023-01-23 23:53:15.462875: step: 32/77, loss: 3.6132554669165984e-05 2023-01-23 23:53:16.791965: step: 36/77, loss: 0.06018991023302078 2023-01-23 23:53:18.095560: step: 40/77, loss: 1.1510334843478631e-05 2023-01-23 23:53:19.375203: step: 44/77, loss: 0.0002839084481820464 2023-01-23 23:53:20.643605: step: 48/77, loss: 0.006246471311897039 2023-01-23 23:53:21.865322: step: 52/77, loss: 7.348742201429559e-06 2023-01-23 23:53:23.175073: step: 56/77, loss: 0.00037151321885176003 2023-01-23 23:53:24.463343: step: 60/77, loss: 6.909019248269033e-06 2023-01-23 23:53:25.753526: step: 64/77, loss: 0.002263585338369012 2023-01-23 23:53:27.042184: step: 68/77, loss: 5.272985617921222e-06 2023-01-23 23:53:28.317344: step: 72/77, loss: 0.03045172430574894 2023-01-23 23:53:29.630580: step: 76/77, loss: 1.8863595414586598e-06 2023-01-23 23:53:30.928314: step: 80/77, loss: 0.02143908478319645 2023-01-23 23:53:32.242242: step: 84/77, loss: 4.5710938138654456e-05 2023-01-23 23:53:33.540326: step: 88/77, loss: 0.0002125641331076622 2023-01-23 23:53:34.785690: step: 92/77, loss: 5.6164758461818565e-06 2023-01-23 23:53:36.055215: step: 96/77, loss: 0.00024129284429363906 2023-01-23 23:53:37.330468: step: 100/77, loss: 0.006555612199008465 2023-01-23 23:53:38.610713: step: 104/77, loss: 0.008127989247441292 2023-01-23 23:53:39.891239: step: 108/77, loss: 1.2938665349793155e-05 2023-01-23 23:53:41.172909: step: 112/77, loss: 1.1773166988859884e-05 2023-01-23 23:53:42.453211: step: 116/77, loss: 7.165558599808719e-06 2023-01-23 23:53:43.730740: step: 120/77, loss: 0.0068009719252586365 2023-01-23 23:53:45.047121: step: 124/77, loss: 0.004748777486383915 2023-01-23 23:53:46.394164: step: 128/77, loss: 1.4603058673401392e-07 2023-01-23 23:53:47.691560: step: 132/77, loss: 9.514651901554316e-05 2023-01-23 23:53:48.973596: step: 136/77, loss: 0.0013951624277979136 2023-01-23 23:53:50.268474: step: 140/77, loss: 1.3904502338846214e-05 2023-01-23 23:53:51.562625: step: 144/77, loss: 0.014372942969202995 2023-01-23 23:53:52.860651: step: 148/77, loss: 1.192092380364329e-08 2023-01-23 23:53:54.121257: step: 152/77, loss: 0.00029474348411895335 2023-01-23 23:53:55.395559: step: 156/77, loss: 5.219217200647108e-06 2023-01-23 23:53:56.687256: step: 160/77, loss: 4.321329427625642e-08 2023-01-23 23:53:57.974105: step: 164/77, loss: 6.13247902947478e-05 2023-01-23 23:53:59.256393: step: 168/77, loss: 1.291885155296768e-06 2023-01-23 23:54:00.530361: step: 172/77, loss: 0.00014403194654732943 2023-01-23 23:54:01.872887: step: 176/77, loss: 0.00023046269780024886 2023-01-23 23:54:03.179225: step: 180/77, loss: 1.6256115031865193e-06 2023-01-23 23:54:04.474953: step: 184/77, loss: 6.258481732857035e-08 2023-01-23 23:54:05.749319: step: 188/77, loss: 3.6979461128794355e-06 2023-01-23 23:54:07.022854: step: 192/77, loss: 0.001247554668225348 2023-01-23 23:54:08.325849: step: 196/77, loss: 0.00020255711569916457 2023-01-23 23:54:09.642649: step: 200/77, loss: 0.00019485583470668644 2023-01-23 23:54:10.948862: step: 204/77, loss: 4.544795331185014e-07 2023-01-23 23:54:12.217889: step: 208/77, loss: 3.360617483849637e-05 2023-01-23 23:54:13.491313: step: 212/77, loss: 0.034614551812410355 2023-01-23 23:54:14.856402: step: 216/77, loss: 0.0002035450015682727 2023-01-23 23:54:16.148476: step: 220/77, loss: 0.0003514665877446532 2023-01-23 23:54:17.463122: step: 224/77, loss: 4.072986121173017e-05 2023-01-23 23:54:18.799404: step: 228/77, loss: 3.471943159638613e-07 2023-01-23 23:54:20.145043: step: 232/77, loss: 0.00031286414014175534 2023-01-23 23:54:21.459489: step: 236/77, loss: 0.0006258451612666249 2023-01-23 23:54:22.711378: step: 240/77, loss: 6.650136583630228e-06 2023-01-23 23:54:23.989072: step: 244/77, loss: 2.9312335755093955e-05 2023-01-23 23:54:25.347871: step: 248/77, loss: 1.4918949091224931e-05 2023-01-23 23:54:26.626928: step: 252/77, loss: 7.763389362480666e-07 2023-01-23 23:54:27.941421: step: 256/77, loss: 1.8924350797533407e-07 2023-01-23 23:54:29.281001: step: 260/77, loss: 6.862991722300649e-05 2023-01-23 23:54:30.659987: step: 264/77, loss: 5.841187658006675e-07 2023-01-23 23:54:31.956010: step: 268/77, loss: 0.015060674399137497 2023-01-23 23:54:33.259266: step: 272/77, loss: 2.6076759240822867e-07 2023-01-23 23:54:34.588350: step: 276/77, loss: 1.5377199815702625e-06 2023-01-23 23:54:35.819642: step: 280/77, loss: 7.972068942763144e-07 2023-01-23 23:54:37.115424: step: 284/77, loss: 1.070262078428641e-05 2023-01-23 23:54:38.448121: step: 288/77, loss: 0.00015180776244960725 2023-01-23 23:54:39.760741: step: 292/77, loss: 0.0358809158205986 2023-01-23 23:54:41.049335: step: 296/77, loss: 4.167438873992069e-06 2023-01-23 23:54:42.355242: step: 300/77, loss: 2.512177843527752e-06 2023-01-23 23:54:43.674346: step: 304/77, loss: 0.04607342183589935 2023-01-23 23:54:44.986661: step: 308/77, loss: 1.1491039003885817e-05 2023-01-23 23:54:46.216529: step: 312/77, loss: 8.81724372447934e-06 2023-01-23 23:54:47.522419: step: 316/77, loss: 1.818268356146291e-05 2023-01-23 23:54:48.840421: step: 320/77, loss: 0.00010696750541683286 2023-01-23 23:54:50.123108: step: 324/77, loss: 0.0004945929395034909 2023-01-23 23:54:51.450736: step: 328/77, loss: 0.000450802588602528 2023-01-23 23:54:52.727949: step: 332/77, loss: 0.00888136588037014 2023-01-23 23:54:54.009228: step: 336/77, loss: 9.238691234259022e-08 2023-01-23 23:54:55.290003: step: 340/77, loss: 0.000297154561849311 2023-01-23 23:54:56.634984: step: 344/77, loss: 7.912410637800349e-07 2023-01-23 23:54:57.897407: step: 348/77, loss: 4.245068339514546e-06 2023-01-23 23:54:59.184736: step: 352/77, loss: 9.834380989559577e-07 2023-01-23 23:55:00.456386: step: 356/77, loss: 0.015564743429422379 2023-01-23 23:55:01.785676: step: 360/77, loss: 0.0002977493277285248 2023-01-23 23:55:03.114280: step: 364/77, loss: 2.9537855880334973e-05 2023-01-23 23:55:04.406949: step: 368/77, loss: 8.240547322202474e-05 2023-01-23 23:55:05.681233: step: 372/77, loss: 0.0 2023-01-23 23:55:06.986180: step: 376/77, loss: 0.001022365759126842 2023-01-23 23:55:08.323407: step: 380/77, loss: 2.960517349492875e-06 2023-01-23 23:55:09.694252: step: 384/77, loss: 1.3913253496866673e-05 2023-01-23 23:55:10.993003: step: 388/77, loss: 0.00022609457664657384 ================================================== Loss: 0.003 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 28} Test Chinese: {'template': {'p': 0.9436619718309859, 'r': 0.5114503816793893, 'f1': 0.6633663366336633}, 'slot': {'p': 0.42857142857142855, 'r': 0.013550135501355014, 'f1': 0.02626970227670753}, 'combined': 0.017426436163756477, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 28} Test Korean: {'template': {'p': 0.9436619718309859, 'r': 0.5114503816793893, 'f1': 0.6633663366336633}, 'slot': {'p': 0.42424242424242425, 'r': 0.012646793134598013, 'f1': 0.024561403508771933}, 'combined': 0.01629320826819524, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Russian: {'template': {'p': 0.9577464788732394, 'r': 0.5190839694656488, 'f1': 0.6732673267326732}, 'slot': {'p': 0.4411764705882353, 'r': 0.013550135501355014, 'f1': 0.02629272567922875}, 'combined': 0.01770203313056985, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 29 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:56:53.796816: step: 4/77, loss: 7.4505797087454084e-09 2023-01-23 23:56:55.081421: step: 8/77, loss: 6.965329521335661e-05 2023-01-23 23:56:56.399332: step: 12/77, loss: 0.03225981444120407 2023-01-23 23:56:57.667415: step: 16/77, loss: 5.960463234089275e-09 2023-01-23 23:56:58.968250: step: 20/77, loss: 0.0004558908985927701 2023-01-23 23:57:00.248610: step: 24/77, loss: 0.0009515452547930181 2023-01-23 23:57:01.532135: step: 28/77, loss: 0.012123556807637215 2023-01-23 23:57:02.831235: step: 32/77, loss: 3.725286745748235e-08 2023-01-23 23:57:04.163752: step: 36/77, loss: 9.973219857783988e-05 2023-01-23 23:57:05.491534: step: 40/77, loss: 4.164739584666677e-05 2023-01-23 23:57:06.818997: step: 44/77, loss: 0.0001398369495291263 2023-01-23 23:57:08.057795: step: 48/77, loss: 0.0 2023-01-23 23:57:09.338844: step: 52/77, loss: 0.002460264600813389 2023-01-23 23:57:10.634626: step: 56/77, loss: 6.1242428728292e-07 2023-01-23 23:57:11.936802: step: 60/77, loss: 1.2546159950943547e-06 2023-01-23 23:57:13.248915: step: 64/77, loss: 0.00239675329066813 2023-01-23 23:57:14.594015: step: 68/77, loss: 1.627564779482782e-05 2023-01-23 23:57:15.918833: step: 72/77, loss: 1.3729930287809111e-05 2023-01-23 23:57:17.214576: step: 76/77, loss: 0.00010529484279686585 2023-01-23 23:57:18.487959: step: 80/77, loss: 3.3361757232341915e-05 2023-01-23 23:57:19.767227: step: 84/77, loss: 0.002414719434455037 2023-01-23 23:57:21.065100: step: 88/77, loss: 5.140843768458581e-07 2023-01-23 23:57:22.389858: step: 92/77, loss: 0.20920942723751068 2023-01-23 23:57:23.685966: step: 96/77, loss: 1.2781938494299538e-05 2023-01-23 23:57:24.980041: step: 100/77, loss: 0.0014965697191655636 2023-01-23 23:57:26.262587: step: 104/77, loss: 1.2248308394191554e-06 2023-01-23 23:57:27.518009: step: 108/77, loss: 0.029241599142551422 2023-01-23 23:57:28.802878: step: 112/77, loss: 1.4289505543274572e-06 2023-01-23 23:57:30.058929: step: 116/77, loss: 1.4232809917302802e-05 2023-01-23 23:57:31.353219: step: 120/77, loss: 4.231874015658832e-07 2023-01-23 23:57:32.688943: step: 124/77, loss: 2.9802318390892424e-09 2023-01-23 23:57:33.962747: step: 128/77, loss: 1.923632453326718e-06 2023-01-23 23:57:35.236180: step: 132/77, loss: 5.751722937930026e-07 2023-01-23 23:57:36.492403: step: 136/77, loss: 5.915730412198172e-07 2023-01-23 23:57:37.782802: step: 140/77, loss: 0.00011380821524653584 2023-01-23 23:57:39.083780: step: 144/77, loss: 8.085298759397119e-05 2023-01-23 23:57:40.399378: step: 148/77, loss: 0.00672421557828784 2023-01-23 23:57:41.718849: step: 152/77, loss: 1.7091322206397308e-06 2023-01-23 23:57:42.995741: step: 156/77, loss: 1.7865913832793012e-06 2023-01-23 23:57:44.268694: step: 160/77, loss: 7.068046397762373e-05 2023-01-23 23:57:45.582113: step: 164/77, loss: 7.897369869169779e-07 2023-01-23 23:57:46.945397: step: 168/77, loss: 3.203735445822531e-07 2023-01-23 23:57:48.243037: step: 172/77, loss: 2.61657555711281e-06 2023-01-23 23:57:49.538066: step: 176/77, loss: 3.048412054340588e-06 2023-01-23 23:57:50.799545: step: 180/77, loss: 5.527518624148797e-06 2023-01-23 23:57:52.074255: step: 184/77, loss: 3.007160012202803e-05 2023-01-23 23:57:53.418225: step: 188/77, loss: 2.419860720692668e-06 2023-01-23 23:57:54.726530: step: 192/77, loss: 7.220292172860354e-05 2023-01-23 23:57:56.027587: step: 196/77, loss: 2.734985901042819e-05 2023-01-23 23:57:57.334867: step: 200/77, loss: 2.2843109945824835e-06 2023-01-23 23:57:58.628732: step: 204/77, loss: 0.00011313649883959442 2023-01-23 23:57:59.963721: step: 208/77, loss: 0.00021175568690523505 2023-01-23 23:58:01.249769: step: 212/77, loss: 3.129242287513989e-08 2023-01-23 23:58:02.543774: step: 216/77, loss: 6.079644663259387e-07 2023-01-23 23:58:03.847291: step: 220/77, loss: 0.0003149434342049062 2023-01-23 23:58:05.174077: step: 224/77, loss: 0.0005252505652606487 2023-01-23 23:58:06.416451: step: 228/77, loss: 0.0004992748727090657 2023-01-23 23:58:07.736842: step: 232/77, loss: 0.014604151248931885 2023-01-23 23:58:09.022570: step: 236/77, loss: 1.513875190539693e-06 2023-01-23 23:58:10.362835: step: 240/77, loss: 9.912311725202017e-06 2023-01-23 23:58:11.616794: step: 244/77, loss: 1.54368092353252e-06 2023-01-23 23:58:12.949051: step: 248/77, loss: 0.01198370661586523 2023-01-23 23:58:14.238594: step: 252/77, loss: 0.00092380988644436 2023-01-23 23:58:15.560943: step: 256/77, loss: 3.4029815196845448e-06 2023-01-23 23:58:16.904456: step: 260/77, loss: 0.00012798003444913775 2023-01-23 23:58:18.212433: step: 264/77, loss: 8.236154826590791e-06 2023-01-23 23:58:19.533813: step: 268/77, loss: 0.0007766756461933255 2023-01-23 23:58:20.817433: step: 272/77, loss: 0.012719300575554371 2023-01-23 23:58:22.106382: step: 276/77, loss: 0.0002160976582672447 2023-01-23 23:58:23.387006: step: 280/77, loss: 0.12471464276313782 2023-01-23 23:58:24.668910: step: 284/77, loss: 4.7619501856388524e-05 2023-01-23 23:58:25.985005: step: 288/77, loss: 3.392129292478785e-05 2023-01-23 23:58:27.324358: step: 292/77, loss: 1.928166739162407e-06 2023-01-23 23:58:28.639082: step: 296/77, loss: 0.048692576587200165 2023-01-23 23:58:29.911315: step: 300/77, loss: 0.0004496439069043845 2023-01-23 23:58:31.198116: step: 304/77, loss: 0.009658782742917538 2023-01-23 23:58:32.485828: step: 308/77, loss: 4.896618702332489e-05 2023-01-23 23:58:33.760705: step: 312/77, loss: 0.018360355868935585 2023-01-23 23:58:35.065445: step: 316/77, loss: 1.70316445746721e-06 2023-01-23 23:58:36.348165: step: 320/77, loss: 5.35187200512155e-06 2023-01-23 23:58:37.660737: step: 324/77, loss: 0.0013014284195378423 2023-01-23 23:58:38.968153: step: 328/77, loss: 6.562308044522069e-06 2023-01-23 23:58:40.275163: step: 332/77, loss: 2.9375285521382466e-05 2023-01-23 23:58:41.618976: step: 336/77, loss: 7.343215111177415e-05 2023-01-23 23:58:42.946009: step: 340/77, loss: 0.0003839013515971601 2023-01-23 23:58:44.239023: step: 344/77, loss: 5.2280905947554857e-05 2023-01-23 23:58:45.541524: step: 348/77, loss: 0.0002265849761897698 2023-01-23 23:58:46.901149: step: 352/77, loss: 0.00012757748481817544 2023-01-23 23:58:48.224884: step: 356/77, loss: 0.0004892974975518882 2023-01-23 23:58:49.572685: step: 360/77, loss: 0.04070931300520897 2023-01-23 23:58:50.820070: step: 364/77, loss: 0.00010159768862649798 2023-01-23 23:58:52.112631: step: 368/77, loss: 0.0039003193378448486 2023-01-23 23:58:53.406235: step: 372/77, loss: 0.0008470588945783675 2023-01-23 23:58:54.701733: step: 376/77, loss: 0.03480779007077217 2023-01-23 23:58:55.965277: step: 380/77, loss: 6.759603274986148e-05 2023-01-23 23:58:57.247687: step: 384/77, loss: 0.03037024661898613 2023-01-23 23:58:58.545819: step: 388/77, loss: 7.473508958355524e-06 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 29} Test Chinese: {'template': {'p': 0.9436619718309859, 'r': 0.5114503816793893, 'f1': 0.6633663366336633}, 'slot': {'p': 0.45161290322580644, 'r': 0.012646793134598013, 'f1': 0.024604569420035152}, 'combined': 0.016321843080617376, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 29} Test Korean: {'template': {'p': 0.9436619718309859, 'r': 0.5114503816793893, 'f1': 0.6633663366336633}, 'slot': {'p': 0.43333333333333335, 'r': 0.011743450767841012, 'f1': 0.022867194371152158}, 'combined': 0.015169326959081133, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 29} Test Russian: {'template': {'p': 0.9305555555555556, 'r': 0.5114503816793893, 'f1': 0.6600985221674877}, 'slot': {'p': 0.45161290322580644, 'r': 0.012646793134598013, 'f1': 0.024604569420035152}, 'combined': 0.016241439912732563, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4}