Command that produces this log: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> trans_rep.weight: torch.Size([1024, 2048]) >>> trans_rep.bias: torch.Size([1024]) >>> hidden_ffns.Corruplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Corruplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Cybercrimeplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Cybercrimeplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Disasterplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Disasterplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Displacementplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Displacementplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Epidemiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Epidemiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Etiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Etiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Protestplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Protestplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Terrorplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Terrorplate.layers.0.bias: torch.Size([768]) >>> template_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> type_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Corruplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Corruplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Disasterplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Disasterplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Displacementplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Displacementplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Epidemiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Epidemiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Etiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Etiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Protestplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Protestplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Terrorplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Terrorplate.layers.1.bias: torch.Size([6]) >>> completion_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Corruplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Corruplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Disasterplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Disasterplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Displacementplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Displacementplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Epidemiplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Epidemiplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Etiplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Etiplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Protestplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Protestplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Terrorplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Terrorplate.layers.1.bias: torch.Size([4]) >>> overtime_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) n_trainable_params: 582185936, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:27:49.340702: step: 4/77, loss: 1.0471444129943848 2023-01-23 22:27:50.635935: step: 8/77, loss: 1.0665420293807983 2023-01-23 22:27:51.886187: step: 12/77, loss: 1.0573136806488037 2023-01-23 22:27:53.174758: step: 16/77, loss: 1.0555137395858765 2023-01-23 22:27:54.469025: step: 20/77, loss: 1.0451674461364746 2023-01-23 22:27:55.785688: step: 24/77, loss: 1.049659252166748 2023-01-23 22:27:57.093839: step: 28/77, loss: 1.0589720010757446 2023-01-23 22:27:58.384918: step: 32/77, loss: 1.0422852039337158 2023-01-23 22:27:59.684346: step: 36/77, loss: 1.0360052585601807 2023-01-23 22:28:00.985109: step: 40/77, loss: 1.028542399406433 2023-01-23 22:28:02.248970: step: 44/77, loss: 1.021812915802002 2023-01-23 22:28:03.509200: step: 48/77, loss: 1.0086398124694824 2023-01-23 22:28:04.818445: step: 52/77, loss: 1.0082159042358398 2023-01-23 22:28:06.093898: step: 56/77, loss: 0.9977434873580933 2023-01-23 22:28:07.357730: step: 60/77, loss: 0.9916622638702393 2023-01-23 22:28:08.672997: step: 64/77, loss: 0.9763437509536743 2023-01-23 22:28:09.899028: step: 68/77, loss: 0.9689540863037109 2023-01-23 22:28:11.219943: step: 72/77, loss: 0.9549002647399902 2023-01-23 22:28:12.544707: step: 76/77, loss: 0.934126615524292 2023-01-23 22:28:13.858792: step: 80/77, loss: 0.9254927039146423 2023-01-23 22:28:15.114897: step: 84/77, loss: 0.9223530292510986 2023-01-23 22:28:16.427521: step: 88/77, loss: 0.8978661298751831 2023-01-23 22:28:17.735304: step: 92/77, loss: 0.8742111921310425 2023-01-23 22:28:19.052483: step: 96/77, loss: 0.8754185438156128 2023-01-23 22:28:20.363008: step: 100/77, loss: 0.8745602369308472 2023-01-23 22:28:21.672717: step: 104/77, loss: 0.8353378772735596 2023-01-23 22:28:22.982412: step: 108/77, loss: 0.8197598457336426 2023-01-23 22:28:24.305459: step: 112/77, loss: 0.8277198076248169 2023-01-23 22:28:25.522152: step: 116/77, loss: 0.7971259355545044 2023-01-23 22:28:26.828410: step: 120/77, loss: 0.7589684724807739 2023-01-23 22:28:28.143292: step: 124/77, loss: 0.7603246569633484 2023-01-23 22:28:29.446597: step: 128/77, loss: 0.7163886427879333 2023-01-23 22:28:30.810101: step: 132/77, loss: 0.7048380970954895 2023-01-23 22:28:32.124069: step: 136/77, loss: 0.690535306930542 2023-01-23 22:28:33.482400: step: 140/77, loss: 0.6806949973106384 2023-01-23 22:28:34.800681: step: 144/77, loss: 0.6635257005691528 2023-01-23 22:28:36.135495: step: 148/77, loss: 0.6138787269592285 2023-01-23 22:28:37.504683: step: 152/77, loss: 0.5696084499359131 2023-01-23 22:28:38.822323: step: 156/77, loss: 0.5784422755241394 2023-01-23 22:28:40.176765: step: 160/77, loss: 0.6332916021347046 2023-01-23 22:28:41.489781: step: 164/77, loss: 0.505962073802948 2023-01-23 22:28:42.846724: step: 168/77, loss: 0.5323714017868042 2023-01-23 22:28:44.167689: step: 172/77, loss: 0.4151668846607208 2023-01-23 22:28:45.463625: step: 176/77, loss: 0.40285319089889526 2023-01-23 22:28:46.766061: step: 180/77, loss: 0.39407557249069214 2023-01-23 22:28:48.046862: step: 184/77, loss: 0.4074208736419678 2023-01-23 22:28:49.445446: step: 188/77, loss: 0.4065112769603729 2023-01-23 22:28:50.705229: step: 192/77, loss: 0.37398186326026917 2023-01-23 22:28:51.984796: step: 196/77, loss: 0.3212049603462219 2023-01-23 22:28:53.275755: step: 200/77, loss: 0.30345451831817627 2023-01-23 22:28:54.606637: step: 204/77, loss: 0.4757692813873291 2023-01-23 22:28:55.881818: step: 208/77, loss: 0.2263353168964386 2023-01-23 22:28:57.150671: step: 212/77, loss: 0.26267728209495544 2023-01-23 22:28:58.437665: step: 216/77, loss: 0.17210954427719116 2023-01-23 22:28:59.740472: step: 220/77, loss: 0.2977479100227356 2023-01-23 22:29:01.045757: step: 224/77, loss: 0.1835707575082779 2023-01-23 22:29:02.361674: step: 228/77, loss: 0.3345186412334442 2023-01-23 22:29:03.662573: step: 232/77, loss: 0.13545027375221252 2023-01-23 22:29:04.977149: step: 236/77, loss: 0.12607897818088531 2023-01-23 22:29:06.300883: step: 240/77, loss: 0.11827825009822845 2023-01-23 22:29:07.602908: step: 244/77, loss: 0.12457980215549469 2023-01-23 22:29:08.905818: step: 248/77, loss: 0.15995153784751892 2023-01-23 22:29:10.208213: step: 252/77, loss: 0.3346996009349823 2023-01-23 22:29:11.507341: step: 256/77, loss: 0.22116169333457947 2023-01-23 22:29:12.785079: step: 260/77, loss: 0.10209763050079346 2023-01-23 22:29:14.104859: step: 264/77, loss: 0.07830449193716049 2023-01-23 22:29:15.388460: step: 268/77, loss: 0.05993305519223213 2023-01-23 22:29:16.667124: step: 272/77, loss: 0.1856289505958557 2023-01-23 22:29:17.964196: step: 276/77, loss: 0.11337044090032578 2023-01-23 22:29:19.266955: step: 280/77, loss: 0.12034044414758682 2023-01-23 22:29:20.593175: step: 284/77, loss: 0.12572534382343292 2023-01-23 22:29:21.940605: step: 288/77, loss: 0.1536373645067215 2023-01-23 22:29:23.223141: step: 292/77, loss: 0.04774583876132965 2023-01-23 22:29:24.489952: step: 296/77, loss: 0.13813336193561554 2023-01-23 22:29:25.867294: step: 300/77, loss: 0.06406290829181671 2023-01-23 22:29:27.147936: step: 304/77, loss: 0.09071193635463715 2023-01-23 22:29:28.474505: step: 308/77, loss: 0.08882340788841248 2023-01-23 22:29:29.809807: step: 312/77, loss: 0.03451357036828995 2023-01-23 22:29:31.053477: step: 316/77, loss: 0.05280934274196625 2023-01-23 22:29:32.386870: step: 320/77, loss: 0.0957951694726944 2023-01-23 22:29:33.734712: step: 324/77, loss: 0.09612944722175598 2023-01-23 22:29:35.011107: step: 328/77, loss: 0.09476247429847717 2023-01-23 22:29:36.285493: step: 332/77, loss: 0.08481978625059128 2023-01-23 22:29:37.517620: step: 336/77, loss: 0.39117416739463806 2023-01-23 22:29:38.858970: step: 340/77, loss: 0.14474590122699738 2023-01-23 22:29:40.144763: step: 344/77, loss: 0.10986341536045074 2023-01-23 22:29:41.475221: step: 348/77, loss: 0.03673511743545532 2023-01-23 22:29:42.772488: step: 352/77, loss: 0.0850902646780014 2023-01-23 22:29:44.101890: step: 356/77, loss: 0.09250978380441666 2023-01-23 22:29:45.430230: step: 360/77, loss: 0.3991732597351074 2023-01-23 22:29:46.760007: step: 364/77, loss: 0.13359756767749786 2023-01-23 22:29:48.067033: step: 368/77, loss: 0.20494695007801056 2023-01-23 22:29:49.354425: step: 372/77, loss: 0.08006304502487183 2023-01-23 22:29:50.701078: step: 376/77, loss: 0.0816473588347435 2023-01-23 22:29:52.005344: step: 380/77, loss: 0.12702281773090363 2023-01-23 22:29:53.317051: step: 384/77, loss: 0.05596127733588219 2023-01-23 22:29:54.703007: step: 388/77, loss: 0.12373457849025726 ================================================== Loss: 0.479 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Chinese: {'template': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Korean: {'template': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Russian: {'template': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:31:51.549911: step: 4/77, loss: 0.21097904443740845 2023-01-23 22:31:52.859042: step: 8/77, loss: 0.12158848345279694 2023-01-23 22:31:54.188236: step: 12/77, loss: 0.08410963416099548 2023-01-23 22:31:55.445544: step: 16/77, loss: 0.09124782681465149 2023-01-23 22:31:56.751887: step: 20/77, loss: 0.08149276673793793 2023-01-23 22:31:58.035474: step: 24/77, loss: 0.047875721007585526 2023-01-23 22:31:59.341022: step: 28/77, loss: 0.07582291960716248 2023-01-23 22:32:00.649455: step: 32/77, loss: 0.05457703024148941 2023-01-23 22:32:01.935449: step: 36/77, loss: 0.05679886043071747 2023-01-23 22:32:03.233774: step: 40/77, loss: 0.28963497281074524 2023-01-23 22:32:04.577513: step: 44/77, loss: 0.04498041421175003 2023-01-23 22:32:05.859519: step: 48/77, loss: 0.07296052575111389 2023-01-23 22:32:07.164364: step: 52/77, loss: 0.07825222611427307 2023-01-23 22:32:08.413626: step: 56/77, loss: 0.07049486041069031 2023-01-23 22:32:09.715475: step: 60/77, loss: 0.05149099975824356 2023-01-23 22:32:11.028954: step: 64/77, loss: 0.10428111255168915 2023-01-23 22:32:12.347786: step: 68/77, loss: 0.15299206972122192 2023-01-23 22:32:13.631497: step: 72/77, loss: 0.07444358617067337 2023-01-23 22:32:14.948115: step: 76/77, loss: 0.10790640115737915 2023-01-23 22:32:16.223186: step: 80/77, loss: 0.08597603440284729 2023-01-23 22:32:17.564345: step: 84/77, loss: 0.12119434773921967 2023-01-23 22:32:18.909442: step: 88/77, loss: 0.08092857897281647 2023-01-23 22:32:20.256097: step: 92/77, loss: 0.3239857852458954 2023-01-23 22:32:21.560031: step: 96/77, loss: 0.07001736015081406 2023-01-23 22:32:22.872932: step: 100/77, loss: 0.06111065298318863 2023-01-23 22:32:24.149597: step: 104/77, loss: 0.09189890325069427 2023-01-23 22:32:25.432323: step: 108/77, loss: 0.05465451627969742 2023-01-23 22:32:26.772095: step: 112/77, loss: 0.03947276249527931 2023-01-23 22:32:28.060788: step: 116/77, loss: 0.0742819756269455 2023-01-23 22:32:29.347954: step: 120/77, loss: 0.0826253667473793 2023-01-23 22:32:30.670087: step: 124/77, loss: 0.1338333636522293 2023-01-23 22:32:31.969535: step: 128/77, loss: 0.07840230315923691 2023-01-23 22:32:33.300606: step: 132/77, loss: 0.13976845145225525 2023-01-23 22:32:34.622180: step: 136/77, loss: 0.06765273958444595 2023-01-23 22:32:35.957567: step: 140/77, loss: 0.27704665064811707 2023-01-23 22:32:37.276325: step: 144/77, loss: 0.06916029751300812 2023-01-23 22:32:38.576580: step: 148/77, loss: 0.1273961067199707 2023-01-23 22:32:39.901555: step: 152/77, loss: 0.10133853554725647 2023-01-23 22:32:41.186823: step: 156/77, loss: 0.13052284717559814 2023-01-23 22:32:42.500631: step: 160/77, loss: 0.14147736132144928 2023-01-23 22:32:43.792910: step: 164/77, loss: 0.11222036182880402 2023-01-23 22:32:45.113965: step: 168/77, loss: 0.053240492939949036 2023-01-23 22:32:46.491594: step: 172/77, loss: 0.11585910618305206 2023-01-23 22:32:47.824721: step: 176/77, loss: 0.03825229033827782 2023-01-23 22:32:49.137032: step: 180/77, loss: 0.07558736205101013 2023-01-23 22:32:50.417590: step: 184/77, loss: 0.10447107255458832 2023-01-23 22:32:51.783743: step: 188/77, loss: 0.09023972600698471 2023-01-23 22:32:53.119860: step: 192/77, loss: 0.09343035519123077 2023-01-23 22:32:54.451863: step: 196/77, loss: 0.17641930282115936 2023-01-23 22:32:55.752251: step: 200/77, loss: 0.13928231596946716 2023-01-23 22:32:57.051304: step: 204/77, loss: 0.05740395188331604 2023-01-23 22:32:58.385758: step: 208/77, loss: 0.12304575741291046 2023-01-23 22:32:59.710524: step: 212/77, loss: 0.12332822382450104 2023-01-23 22:33:01.027973: step: 216/77, loss: 0.05503353476524353 2023-01-23 22:33:02.321047: step: 220/77, loss: 0.05017006769776344 2023-01-23 22:33:03.589303: step: 224/77, loss: 0.09479832649230957 2023-01-23 22:33:04.908771: step: 228/77, loss: 0.058456674218177795 2023-01-23 22:33:06.214467: step: 232/77, loss: 0.042720895260572433 2023-01-23 22:33:07.508805: step: 236/77, loss: 0.06012682616710663 2023-01-23 22:33:08.775368: step: 240/77, loss: 0.10036720335483551 2023-01-23 22:33:10.090272: step: 244/77, loss: 0.0868229866027832 2023-01-23 22:33:11.418460: step: 248/77, loss: 0.11549285054206848 2023-01-23 22:33:12.753693: step: 252/77, loss: 0.1314641237258911 2023-01-23 22:33:14.030938: step: 256/77, loss: 0.14948371052742004 2023-01-23 22:33:15.342714: step: 260/77, loss: 0.052075546234846115 2023-01-23 22:33:16.623338: step: 264/77, loss: 0.1409543752670288 2023-01-23 22:33:17.937080: step: 268/77, loss: 0.1372973918914795 2023-01-23 22:33:19.247749: step: 272/77, loss: 0.051039919257164 2023-01-23 22:33:20.525672: step: 276/77, loss: 0.10971783846616745 2023-01-23 22:33:21.782376: step: 280/77, loss: 0.03424395993351936 2023-01-23 22:33:23.068908: step: 284/77, loss: 0.06090284138917923 2023-01-23 22:33:24.433176: step: 288/77, loss: 0.03272155672311783 2023-01-23 22:33:25.721386: step: 292/77, loss: 0.07862883806228638 2023-01-23 22:33:27.025836: step: 296/77, loss: 0.030069496482610703 2023-01-23 22:33:28.376683: step: 300/77, loss: 0.20931200683116913 2023-01-23 22:33:29.726143: step: 304/77, loss: 0.2595861554145813 2023-01-23 22:33:31.056303: step: 308/77, loss: 0.082735575735569 2023-01-23 22:33:32.402014: step: 312/77, loss: 0.08077457547187805 2023-01-23 22:33:33.684270: step: 316/77, loss: 0.07214593142271042 2023-01-23 22:33:34.997490: step: 320/77, loss: 0.07391369342803955 2023-01-23 22:33:36.288744: step: 324/77, loss: 0.25721216201782227 2023-01-23 22:33:37.573177: step: 328/77, loss: 0.1590745449066162 2023-01-23 22:33:38.839623: step: 332/77, loss: 0.08248879760503769 2023-01-23 22:33:40.164554: step: 336/77, loss: 0.11362461745738983 2023-01-23 22:33:41.446476: step: 340/77, loss: 0.024861745536327362 2023-01-23 22:33:42.793534: step: 344/77, loss: 0.21572968363761902 2023-01-23 22:33:44.078751: step: 348/77, loss: 0.09827074408531189 2023-01-23 22:33:45.405042: step: 352/77, loss: 0.05567440763115883 2023-01-23 22:33:46.724362: step: 356/77, loss: 0.02224394679069519 2023-01-23 22:33:48.034116: step: 360/77, loss: 0.07356810569763184 2023-01-23 22:33:49.338078: step: 364/77, loss: 0.11073225736618042 2023-01-23 22:33:50.648547: step: 368/77, loss: 0.0851237028837204 2023-01-23 22:33:51.954535: step: 372/77, loss: 0.032852984964847565 2023-01-23 22:33:53.284776: step: 376/77, loss: 0.12542441487312317 2023-01-23 22:33:54.570905: step: 380/77, loss: 0.08235940337181091 2023-01-23 22:33:55.884905: step: 384/77, loss: 0.08265835046768188 2023-01-23 22:33:57.232170: step: 388/77, loss: 0.12302671372890472 ================================================== Loss: 0.100 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Chinese: {'template': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Korean: {'template': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Russian: {'template': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 2 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:35:37.433600: step: 4/77, loss: 0.04768185317516327 2023-01-23 22:35:38.716981: step: 8/77, loss: 0.07104780524969101 2023-01-23 22:35:40.052602: step: 12/77, loss: 0.19291284680366516 2023-01-23 22:35:41.304167: step: 16/77, loss: 0.0543084442615509 2023-01-23 22:35:42.591920: step: 20/77, loss: 0.1463380753993988 2023-01-23 22:35:43.944599: step: 24/77, loss: 0.08262240886688232 2023-01-23 22:35:45.213684: step: 28/77, loss: 0.04590801149606705 2023-01-23 22:35:46.515458: step: 32/77, loss: 0.1446647346019745 2023-01-23 22:35:47.847388: step: 36/77, loss: 0.15891560912132263 2023-01-23 22:35:49.123262: step: 40/77, loss: 0.11249516904354095 2023-01-23 22:35:50.409277: step: 44/77, loss: 0.07387419044971466 2023-01-23 22:35:51.694376: step: 48/77, loss: 0.07768457382917404 2023-01-23 22:35:53.013019: step: 52/77, loss: 0.05759643018245697 2023-01-23 22:35:54.277480: step: 56/77, loss: 0.3014858663082123 2023-01-23 22:35:55.609746: step: 60/77, loss: 0.023099102079868317 2023-01-23 22:35:56.927485: step: 64/77, loss: 0.054274268448352814 2023-01-23 22:35:58.187068: step: 68/77, loss: 0.04140179604291916 2023-01-23 22:35:59.466211: step: 72/77, loss: 0.0799446851015091 2023-01-23 22:36:00.774916: step: 76/77, loss: 0.07328343391418457 2023-01-23 22:36:02.119122: step: 80/77, loss: 0.19326601922512054 2023-01-23 22:36:03.418997: step: 84/77, loss: 0.15116506814956665 2023-01-23 22:36:04.760294: step: 88/77, loss: 0.12275572866201401 2023-01-23 22:36:06.033728: step: 92/77, loss: 0.07996051758527756 2023-01-23 22:36:07.367716: step: 96/77, loss: 0.04166724160313606 2023-01-23 22:36:08.639263: step: 100/77, loss: 0.2334735244512558 2023-01-23 22:36:09.961486: step: 104/77, loss: 0.05039349943399429 2023-01-23 22:36:11.251827: step: 108/77, loss: 0.104027658700943 2023-01-23 22:36:12.571474: step: 112/77, loss: 0.11883699893951416 2023-01-23 22:36:13.886974: step: 116/77, loss: 0.16714473068714142 2023-01-23 22:36:15.192363: step: 120/77, loss: 0.16093513369560242 2023-01-23 22:36:16.470999: step: 124/77, loss: 0.08430910110473633 2023-01-23 22:36:17.753717: step: 128/77, loss: 0.09487202763557434 2023-01-23 22:36:19.046046: step: 132/77, loss: 0.11969651281833649 2023-01-23 22:36:20.397055: step: 136/77, loss: 0.12813276052474976 2023-01-23 22:36:21.695127: step: 140/77, loss: 0.10447872430086136 2023-01-23 22:36:23.040726: step: 144/77, loss: 0.08772681653499603 2023-01-23 22:36:24.369658: step: 148/77, loss: 0.10942377150058746 2023-01-23 22:36:25.660740: step: 152/77, loss: 0.03597911819815636 2023-01-23 22:36:26.917610: step: 156/77, loss: 0.05257668346166611 2023-01-23 22:36:28.178033: step: 160/77, loss: 0.048265159130096436 2023-01-23 22:36:29.472290: step: 164/77, loss: 0.07379506528377533 2023-01-23 22:36:30.763039: step: 168/77, loss: 0.04585869237780571 2023-01-23 22:36:32.041528: step: 172/77, loss: 0.04158536717295647 2023-01-23 22:36:33.332846: step: 176/77, loss: 0.2913612425327301 2023-01-23 22:36:34.690842: step: 180/77, loss: 0.04088529944419861 2023-01-23 22:36:35.958754: step: 184/77, loss: 0.08837796747684479 2023-01-23 22:36:37.237487: step: 188/77, loss: 0.038671888411045074 2023-01-23 22:36:38.552261: step: 192/77, loss: 0.04168698936700821 2023-01-23 22:36:39.867511: step: 196/77, loss: 0.04485369473695755 2023-01-23 22:36:41.139392: step: 200/77, loss: 0.18418560922145844 2023-01-23 22:36:42.459404: step: 204/77, loss: 0.07247032225131989 2023-01-23 22:36:43.758756: step: 208/77, loss: 0.04573575779795647 2023-01-23 22:36:45.028480: step: 212/77, loss: 0.04544593393802643 2023-01-23 22:36:46.320887: step: 216/77, loss: 0.07713460922241211 2023-01-23 22:36:47.645495: step: 220/77, loss: 0.10834340751171112 2023-01-23 22:36:48.927641: step: 224/77, loss: 0.049649372696876526 2023-01-23 22:36:50.219024: step: 228/77, loss: 0.06424231082201004 2023-01-23 22:36:51.509571: step: 232/77, loss: 0.0655415877699852 2023-01-23 22:36:52.874614: step: 236/77, loss: 0.12457633018493652 2023-01-23 22:36:54.170329: step: 240/77, loss: 0.09666450321674347 2023-01-23 22:36:55.488887: step: 244/77, loss: 0.045510344207286835 2023-01-23 22:36:56.805177: step: 248/77, loss: 0.013221305795013905 2023-01-23 22:36:58.139129: step: 252/77, loss: 0.01512373797595501 2023-01-23 22:36:59.447130: step: 256/77, loss: 0.03427667170763016 2023-01-23 22:37:00.795753: step: 260/77, loss: 0.015169290825724602 2023-01-23 22:37:02.118199: step: 264/77, loss: 0.028215918689966202 2023-01-23 22:37:03.401742: step: 268/77, loss: 0.026102934032678604 2023-01-23 22:37:04.745372: step: 272/77, loss: 0.014348288998007774 2023-01-23 22:37:06.009480: step: 276/77, loss: 0.020344989374279976 2023-01-23 22:37:07.333214: step: 280/77, loss: 0.13085316121578217 2023-01-23 22:37:08.661138: step: 284/77, loss: 0.0704241618514061 2023-01-23 22:37:10.002237: step: 288/77, loss: 0.07423460483551025 2023-01-23 22:37:11.279062: step: 292/77, loss: 0.028306419029831886 2023-01-23 22:37:12.606706: step: 296/77, loss: 0.06515046209096909 2023-01-23 22:37:13.934489: step: 300/77, loss: 0.3677542209625244 2023-01-23 22:37:15.194952: step: 304/77, loss: 0.04421716928482056 2023-01-23 22:37:16.479313: step: 308/77, loss: 0.009226376190781593 2023-01-23 22:37:17.813030: step: 312/77, loss: 0.018259337171912193 2023-01-23 22:37:19.109915: step: 316/77, loss: 0.05614681541919708 2023-01-23 22:37:20.472492: step: 320/77, loss: 0.019507668912410736 2023-01-23 22:37:21.770033: step: 324/77, loss: 0.06723038107156754 2023-01-23 22:37:23.043502: step: 328/77, loss: 0.057566963136196136 2023-01-23 22:37:24.360472: step: 332/77, loss: 0.02253660187125206 2023-01-23 22:37:25.689506: step: 336/77, loss: 0.024903327226638794 2023-01-23 22:37:27.051910: step: 340/77, loss: 0.04696238785982132 2023-01-23 22:37:28.408627: step: 344/77, loss: 0.10337799787521362 2023-01-23 22:37:29.700563: step: 348/77, loss: 0.10140752792358398 2023-01-23 22:37:30.986562: step: 352/77, loss: 0.08285317569971085 2023-01-23 22:37:32.275240: step: 356/77, loss: 0.1097133457660675 2023-01-23 22:37:33.575983: step: 360/77, loss: 0.04434497654438019 2023-01-23 22:37:34.882077: step: 364/77, loss: 0.0060016559436917305 2023-01-23 22:37:36.162347: step: 368/77, loss: 0.010138597339391708 2023-01-23 22:37:37.541588: step: 372/77, loss: 0.024792088195681572 2023-01-23 22:37:38.807326: step: 376/77, loss: 0.19264760613441467 2023-01-23 22:37:40.096213: step: 380/77, loss: 0.03846864402294159 2023-01-23 22:37:41.350509: step: 384/77, loss: 0.01814502663910389 2023-01-23 22:37:42.686415: step: 388/77, loss: 0.01226385124027729 ================================================== Loss: 0.081 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:39:43.679865: step: 4/77, loss: 0.008898122236132622 2023-01-23 22:39:44.962726: step: 8/77, loss: 0.07566316425800323 2023-01-23 22:39:46.309038: step: 12/77, loss: 0.02485734410583973 2023-01-23 22:39:47.626762: step: 16/77, loss: 0.04478280991315842 2023-01-23 22:39:48.923517: step: 20/77, loss: 0.11990983784198761 2023-01-23 22:39:50.243197: step: 24/77, loss: 0.007120449561625719 2023-01-23 22:39:51.525682: step: 28/77, loss: 0.060444124042987823 2023-01-23 22:39:52.809849: step: 32/77, loss: 0.049005698412656784 2023-01-23 22:39:54.108906: step: 36/77, loss: 0.07884863018989563 2023-01-23 22:39:55.433680: step: 40/77, loss: 0.015905214473605156 2023-01-23 22:39:56.727806: step: 44/77, loss: 0.041278135031461716 2023-01-23 22:39:58.037239: step: 48/77, loss: 0.01994415372610092 2023-01-23 22:39:59.371877: step: 52/77, loss: 0.03183678537607193 2023-01-23 22:40:00.695704: step: 56/77, loss: 0.011753171682357788 2023-01-23 22:40:02.092154: step: 60/77, loss: 0.11609509587287903 2023-01-23 22:40:03.329165: step: 64/77, loss: 0.024422401562333107 2023-01-23 22:40:04.677536: step: 68/77, loss: 0.039935797452926636 2023-01-23 22:40:05.958539: step: 72/77, loss: 0.02634143829345703 2023-01-23 22:40:07.261999: step: 76/77, loss: 0.012217414565384388 2023-01-23 22:40:08.596903: step: 80/77, loss: 0.0206155888736248 2023-01-23 22:40:09.907291: step: 84/77, loss: 0.042646557092666626 2023-01-23 22:40:11.232381: step: 88/77, loss: 0.008508237078785896 2023-01-23 22:40:12.522112: step: 92/77, loss: 0.02563760057091713 2023-01-23 22:40:13.818538: step: 96/77, loss: 0.10512962937355042 2023-01-23 22:40:15.139472: step: 100/77, loss: 0.10669828951358795 2023-01-23 22:40:16.465612: step: 104/77, loss: 0.03299158066511154 2023-01-23 22:40:17.822296: step: 108/77, loss: 0.06534292548894882 2023-01-23 22:40:19.125067: step: 112/77, loss: 0.051838312298059464 2023-01-23 22:40:20.463440: step: 116/77, loss: 0.01953636109828949 2023-01-23 22:40:21.753122: step: 120/77, loss: 0.04791083186864853 2023-01-23 22:40:23.037962: step: 124/77, loss: 0.0310364942997694 2023-01-23 22:40:24.326153: step: 128/77, loss: 0.019350484013557434 2023-01-23 22:40:25.710688: step: 132/77, loss: 0.13583087921142578 2023-01-23 22:40:26.970898: step: 136/77, loss: 0.002822377486154437 2023-01-23 22:40:28.324371: step: 140/77, loss: 0.030841834843158722 2023-01-23 22:40:29.641783: step: 144/77, loss: 0.035415925085544586 2023-01-23 22:40:30.928259: step: 148/77, loss: 0.1077069491147995 2023-01-23 22:40:32.216640: step: 152/77, loss: 0.016785571351647377 2023-01-23 22:40:33.513978: step: 156/77, loss: 0.026634112000465393 2023-01-23 22:40:34.872078: step: 160/77, loss: 0.030998708680272102 2023-01-23 22:40:36.210649: step: 164/77, loss: 0.037455003708601 2023-01-23 22:40:37.520352: step: 168/77, loss: 0.03089234046638012 2023-01-23 22:40:38.782714: step: 172/77, loss: 0.010667935013771057 2023-01-23 22:40:40.062870: step: 176/77, loss: 0.0074789999052882195 2023-01-23 22:40:41.311353: step: 180/77, loss: 0.020553266629576683 2023-01-23 22:40:42.603574: step: 184/77, loss: 0.036195676773786545 2023-01-23 22:40:43.884799: step: 188/77, loss: 0.053434185683727264 2023-01-23 22:40:45.233817: step: 192/77, loss: 0.04257701337337494 2023-01-23 22:40:46.567905: step: 196/77, loss: 0.0338403582572937 2023-01-23 22:40:47.905730: step: 200/77, loss: 0.011131498962640762 2023-01-23 22:40:49.256803: step: 204/77, loss: 0.29350656270980835 2023-01-23 22:40:50.540276: step: 208/77, loss: 0.014081919565796852 2023-01-23 22:40:51.845624: step: 212/77, loss: 0.021087724715471268 2023-01-23 22:40:53.135609: step: 216/77, loss: 0.034299690276384354 2023-01-23 22:40:54.425728: step: 220/77, loss: 0.09156246483325958 2023-01-23 22:40:55.722630: step: 224/77, loss: 0.04639114439487457 2023-01-23 22:40:57.039293: step: 228/77, loss: 0.046765901148319244 2023-01-23 22:40:58.380718: step: 232/77, loss: 0.009462382644414902 2023-01-23 22:40:59.712110: step: 236/77, loss: 0.026388362050056458 2023-01-23 22:41:01.015302: step: 240/77, loss: 0.005971093196421862 2023-01-23 22:41:02.344611: step: 244/77, loss: 0.006199051160365343 2023-01-23 22:41:03.658877: step: 248/77, loss: 0.02215702459216118 2023-01-23 22:41:04.961203: step: 252/77, loss: 0.11260189116001129 2023-01-23 22:41:06.249594: step: 256/77, loss: 0.0063177552074193954 2023-01-23 22:41:07.560024: step: 260/77, loss: 0.01616254635155201 2023-01-23 22:41:08.911072: step: 264/77, loss: 0.01467475201934576 2023-01-23 22:41:10.233038: step: 268/77, loss: 0.06858555227518082 2023-01-23 22:41:11.530988: step: 272/77, loss: 0.057683344930410385 2023-01-23 22:41:12.843838: step: 276/77, loss: 0.033667661249637604 2023-01-23 22:41:14.143143: step: 280/77, loss: 0.002162193413823843 2023-01-23 22:41:15.475202: step: 284/77, loss: 0.11145009100437164 2023-01-23 22:41:16.748301: step: 288/77, loss: 0.07552994787693024 2023-01-23 22:41:18.054585: step: 292/77, loss: 0.045220375061035156 2023-01-23 22:41:19.376105: step: 296/77, loss: 0.02754788286983967 2023-01-23 22:41:20.680371: step: 300/77, loss: 0.026420462876558304 2023-01-23 22:41:21.965330: step: 304/77, loss: 0.015606552362442017 2023-01-23 22:41:23.301906: step: 308/77, loss: 0.019723594188690186 2023-01-23 22:41:24.567050: step: 312/77, loss: 0.03442617505788803 2023-01-23 22:41:25.870675: step: 316/77, loss: 0.07348484545946121 2023-01-23 22:41:27.175401: step: 320/77, loss: 0.020555390045046806 2023-01-23 22:41:28.463292: step: 324/77, loss: 0.018619626760482788 2023-01-23 22:41:29.783621: step: 328/77, loss: 0.006300671026110649 2023-01-23 22:41:31.067523: step: 332/77, loss: 0.012363612651824951 2023-01-23 22:41:32.382067: step: 336/77, loss: 0.03483050316572189 2023-01-23 22:41:33.701160: step: 340/77, loss: 0.012440014630556107 2023-01-23 22:41:35.035442: step: 344/77, loss: 0.0694480910897255 2023-01-23 22:41:36.376644: step: 348/77, loss: 0.03845695033669472 2023-01-23 22:41:37.655392: step: 352/77, loss: 0.00946133490651846 2023-01-23 22:41:38.964598: step: 356/77, loss: 0.0837019756436348 2023-01-23 22:41:40.255946: step: 360/77, loss: 0.09910577535629272 2023-01-23 22:41:41.584872: step: 364/77, loss: 0.015134901739656925 2023-01-23 22:41:42.947889: step: 368/77, loss: 0.06312797218561172 2023-01-23 22:41:44.289036: step: 372/77, loss: 0.01796662248671055 2023-01-23 22:41:45.554015: step: 376/77, loss: 0.013086151331663132 2023-01-23 22:41:46.821764: step: 380/77, loss: 0.03351001441478729 2023-01-23 22:41:48.176524: step: 384/77, loss: 0.0543978177011013 2023-01-23 22:41:49.492582: step: 388/77, loss: 0.014590279199182987 ================================================== Loss: 0.042 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 3} Test Chinese: {'template': {'p': 0.9436619718309859, 'r': 0.5317460317460317, 'f1': 0.6802030456852791}, 'slot': {'p': 0.7419354838709677, 'r': 0.019759450171821305, 'f1': 0.038493723849372385}, 'combined': 0.026183548202111162, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 3} Test Korean: {'template': {'p': 0.9444444444444444, 'r': 0.5396825396825397, 'f1': 0.6868686868686867}, 'slot': {'p': 0.7272727272727273, 'r': 0.020618556701030927, 'f1': 0.040100250626566414}, 'combined': 0.027543606490974905, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 3} Test Russian: {'template': {'p': 0.9436619718309859, 'r': 0.5317460317460317, 'f1': 0.6802030456852791}, 'slot': {'p': 0.7419354838709677, 'r': 0.019759450171821305, 'f1': 0.038493723849372385}, 'combined': 0.026183548202111162, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 4 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:43:29.844403: step: 4/77, loss: 0.023417659103870392 2023-01-23 22:43:31.139150: step: 8/77, loss: 0.0196953397244215 2023-01-23 22:43:32.406554: step: 12/77, loss: 0.010604934766888618 2023-01-23 22:43:33.708877: step: 16/77, loss: 0.026274144649505615 2023-01-23 22:43:35.000812: step: 20/77, loss: 0.0014932897174730897 2023-01-23 22:43:36.361726: step: 24/77, loss: 0.025961250066757202 2023-01-23 22:43:37.696034: step: 28/77, loss: 0.042515743523836136 2023-01-23 22:43:38.958791: step: 32/77, loss: 0.010017447173595428 2023-01-23 22:43:40.254136: step: 36/77, loss: 0.022811995819211006 2023-01-23 22:43:41.559532: step: 40/77, loss: 0.03783687949180603 2023-01-23 22:43:42.877572: step: 44/77, loss: 0.025669317692518234 2023-01-23 22:43:44.166425: step: 48/77, loss: 0.012726683169603348 2023-01-23 22:43:45.444258: step: 52/77, loss: 0.09254588186740875 2023-01-23 22:43:46.696278: step: 56/77, loss: 0.03728096932172775 2023-01-23 22:43:47.988516: step: 60/77, loss: 0.02287282608449459 2023-01-23 22:43:49.258706: step: 64/77, loss: 0.021858546882867813 2023-01-23 22:43:50.509226: step: 68/77, loss: 0.010480173863470554 2023-01-23 22:43:51.833594: step: 72/77, loss: 0.11430490016937256 2023-01-23 22:43:53.101845: step: 76/77, loss: 0.0045967064797878265 2023-01-23 22:43:54.415397: step: 80/77, loss: 0.04011622816324234 2023-01-23 22:43:55.713758: step: 84/77, loss: 0.06810526549816132 2023-01-23 22:43:56.987862: step: 88/77, loss: 0.0022294847294688225 2023-01-23 22:43:58.298579: step: 92/77, loss: 0.015196477994322777 2023-01-23 22:43:59.574230: step: 96/77, loss: 0.02204442210495472 2023-01-23 22:44:00.897975: step: 100/77, loss: 0.051844045519828796 2023-01-23 22:44:02.212803: step: 104/77, loss: 0.005850006360560656 2023-01-23 22:44:03.566379: step: 108/77, loss: 0.053736791014671326 2023-01-23 22:44:04.849527: step: 112/77, loss: 0.04627533629536629 2023-01-23 22:44:06.147998: step: 116/77, loss: 0.00092123361537233 2023-01-23 22:44:07.446700: step: 120/77, loss: 0.039544571191072464 2023-01-23 22:44:08.790231: step: 124/77, loss: 0.04642752930521965 2023-01-23 22:44:10.096384: step: 128/77, loss: 0.11437688767910004 2023-01-23 22:44:11.381274: step: 132/77, loss: 0.007598129101097584 2023-01-23 22:44:12.706196: step: 136/77, loss: 0.009800883010029793 2023-01-23 22:44:13.984367: step: 140/77, loss: 0.010883791372179985 2023-01-23 22:44:15.297851: step: 144/77, loss: 0.1618647277355194 2023-01-23 22:44:16.602806: step: 148/77, loss: 0.03676885738968849 2023-01-23 22:44:17.957566: step: 152/77, loss: 0.011590557172894478 2023-01-23 22:44:19.248133: step: 156/77, loss: 0.02291189506649971 2023-01-23 22:44:20.612741: step: 160/77, loss: 0.003542313352227211 2023-01-23 22:44:21.896079: step: 164/77, loss: 0.0626843124628067 2023-01-23 22:44:23.212414: step: 168/77, loss: 0.0027569583617150784 2023-01-23 22:44:24.531728: step: 172/77, loss: 0.017458012327551842 2023-01-23 22:44:25.866796: step: 176/77, loss: 0.030079776421189308 2023-01-23 22:44:27.132539: step: 180/77, loss: 0.008054882287979126 2023-01-23 22:44:28.385932: step: 184/77, loss: 0.0851554423570633 2023-01-23 22:44:29.731115: step: 188/77, loss: 0.04232963174581528 2023-01-23 22:44:31.029976: step: 192/77, loss: 0.029516037553548813 2023-01-23 22:44:32.382676: step: 196/77, loss: 0.018004145473241806 2023-01-23 22:44:33.701927: step: 200/77, loss: 0.02017074078321457 2023-01-23 22:44:35.005707: step: 204/77, loss: 0.017187729477882385 2023-01-23 22:44:36.340576: step: 208/77, loss: 0.021137960255146027 2023-01-23 22:44:37.635007: step: 212/77, loss: 0.010061761364340782 2023-01-23 22:44:38.962861: step: 216/77, loss: 0.03293940797448158 2023-01-23 22:44:40.265810: step: 220/77, loss: 0.028204970061779022 2023-01-23 22:44:41.591159: step: 224/77, loss: 0.03787591680884361 2023-01-23 22:44:42.883406: step: 228/77, loss: 0.007750194985419512 2023-01-23 22:44:44.168730: step: 232/77, loss: 0.11326970160007477 2023-01-23 22:44:45.462561: step: 236/77, loss: 0.0397779680788517 2023-01-23 22:44:46.793225: step: 240/77, loss: 0.009334595873951912 2023-01-23 22:44:48.095054: step: 244/77, loss: 0.007776356302201748 2023-01-23 22:44:49.405341: step: 248/77, loss: 0.017981214448809624 2023-01-23 22:44:50.674617: step: 252/77, loss: 0.042225658893585205 2023-01-23 22:44:52.012208: step: 256/77, loss: 0.021111395210027695 2023-01-23 22:44:53.305107: step: 260/77, loss: 0.09040164202451706 2023-01-23 22:44:54.643763: step: 264/77, loss: 0.13206541538238525 2023-01-23 22:44:55.962116: step: 268/77, loss: 0.004779032897204161 2023-01-23 22:44:57.241002: step: 272/77, loss: 0.088816799223423 2023-01-23 22:44:58.528266: step: 276/77, loss: 0.02132064662873745 2023-01-23 22:44:59.831060: step: 280/77, loss: 0.06475003808736801 2023-01-23 22:45:01.113318: step: 284/77, loss: 0.0050977421924471855 2023-01-23 22:45:02.437962: step: 288/77, loss: 0.014675735495984554 2023-01-23 22:45:03.717436: step: 292/77, loss: 0.06635600328445435 2023-01-23 22:45:05.005352: step: 296/77, loss: 0.03008580021560192 2023-01-23 22:45:06.308165: step: 300/77, loss: 0.006084037013351917 2023-01-23 22:45:07.599830: step: 304/77, loss: 0.05068189650774002 2023-01-23 22:45:08.920611: step: 308/77, loss: 0.07237912714481354 2023-01-23 22:45:10.205893: step: 312/77, loss: 0.02533440850675106 2023-01-23 22:45:11.472009: step: 316/77, loss: 0.05642259865999222 2023-01-23 22:45:12.770229: step: 320/77, loss: 0.07874306291341782 2023-01-23 22:45:14.095025: step: 324/77, loss: 0.0407525859773159 2023-01-23 22:45:15.413151: step: 328/77, loss: 0.017745740711688995 2023-01-23 22:45:16.708390: step: 332/77, loss: 0.012438332661986351 2023-01-23 22:45:18.022917: step: 336/77, loss: 0.02190583571791649 2023-01-23 22:45:19.308963: step: 340/77, loss: 0.014679135754704475 2023-01-23 22:45:20.598006: step: 344/77, loss: 0.007208996452391148 2023-01-23 22:45:21.882158: step: 348/77, loss: 0.005275039467960596 2023-01-23 22:45:23.168077: step: 352/77, loss: 0.01978233829140663 2023-01-23 22:45:24.499822: step: 356/77, loss: 0.03178076446056366 2023-01-23 22:45:25.759670: step: 360/77, loss: 0.04151931032538414 2023-01-23 22:45:27.059119: step: 364/77, loss: 0.004386279731988907 2023-01-23 22:45:28.404967: step: 368/77, loss: 0.046472564339637756 2023-01-23 22:45:29.719818: step: 372/77, loss: 0.06333746761083603 2023-01-23 22:45:31.022249: step: 376/77, loss: 0.026951458305120468 2023-01-23 22:45:32.378843: step: 380/77, loss: 0.017069118097424507 2023-01-23 22:45:33.705096: step: 384/77, loss: 0.025377962738275528 2023-01-23 22:45:35.010616: step: 388/77, loss: 0.005843974184244871 ================================================== Loss: 0.034 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Chinese: {'template': {'p': 0.9180327868852459, 'r': 0.4444444444444444, 'f1': 0.5989304812834225}, 'slot': {'p': 0.5517241379310345, 'r': 0.013745704467353952, 'f1': 0.02682313495389774}, 'combined': 0.016065193127468166, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Korean: {'template': {'p': 0.9166666666666666, 'r': 0.4365079365079365, 'f1': 0.5913978494623655}, 'slot': {'p': 0.5517241379310345, 'r': 0.013745704467353952, 'f1': 0.02682313495389774}, 'combined': 0.01586314432757393, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Russian: {'template': {'p': 0.9180327868852459, 'r': 0.4444444444444444, 'f1': 0.5989304812834225}, 'slot': {'p': 0.5517241379310345, 'r': 0.013745704467353952, 'f1': 0.02682313495389774}, 'combined': 0.016065193127468166, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 5 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:47:16.647294: step: 4/77, loss: 0.009712163358926773 2023-01-23 22:47:17.927024: step: 8/77, loss: 0.07672275602817535 2023-01-23 22:47:19.267688: step: 12/77, loss: 0.00230649346485734 2023-01-23 22:47:20.560772: step: 16/77, loss: 0.019387666136026382 2023-01-23 22:47:21.885853: step: 20/77, loss: 0.015886085107922554 2023-01-23 22:47:23.212984: step: 24/77, loss: 0.05474954470992088 2023-01-23 22:47:24.530790: step: 28/77, loss: 0.04290302097797394 2023-01-23 22:47:25.833898: step: 32/77, loss: 0.06911797821521759 2023-01-23 22:47:27.131272: step: 36/77, loss: 0.008430874906480312 2023-01-23 22:47:28.415651: step: 40/77, loss: 0.011744523420929909 2023-01-23 22:47:29.725974: step: 44/77, loss: 0.06511920690536499 2023-01-23 22:47:31.076083: step: 48/77, loss: 0.03562755882740021 2023-01-23 22:47:32.422521: step: 52/77, loss: 0.020151851698756218 2023-01-23 22:47:33.700732: step: 56/77, loss: 0.0567491240799427 2023-01-23 22:47:34.961322: step: 60/77, loss: 0.08791999518871307 2023-01-23 22:47:36.233495: step: 64/77, loss: 0.03458832576870918 2023-01-23 22:47:37.570737: step: 68/77, loss: 0.0075601390562951565 2023-01-23 22:47:38.874287: step: 72/77, loss: 0.006983015686273575 2023-01-23 22:47:40.171027: step: 76/77, loss: 0.008428785018622875 2023-01-23 22:47:41.453034: step: 80/77, loss: 0.017788860946893692 2023-01-23 22:47:42.723212: step: 84/77, loss: 0.028414368629455566 2023-01-23 22:47:44.079686: step: 88/77, loss: 0.04921044781804085 2023-01-23 22:47:45.412866: step: 92/77, loss: 0.03103695623576641 2023-01-23 22:47:46.749375: step: 96/77, loss: 0.037225544452667236 2023-01-23 22:47:48.024652: step: 100/77, loss: 0.01973811909556389 2023-01-23 22:47:49.323452: step: 104/77, loss: 0.020425807684659958 2023-01-23 22:47:50.633355: step: 108/77, loss: 0.029876621440052986 2023-01-23 22:47:51.998066: step: 112/77, loss: 0.05362161621451378 2023-01-23 22:47:53.274354: step: 116/77, loss: 0.05245602875947952 2023-01-23 22:47:54.561571: step: 120/77, loss: 0.015833435580134392 2023-01-23 22:47:55.826482: step: 124/77, loss: 0.012610914185643196 2023-01-23 22:47:57.112956: step: 128/77, loss: 0.003384954761713743 2023-01-23 22:47:58.461876: step: 132/77, loss: 0.026845553889870644 2023-01-23 22:47:59.783847: step: 136/77, loss: 0.1506502628326416 2023-01-23 22:48:01.086335: step: 140/77, loss: 0.025692788884043694 2023-01-23 22:48:02.435655: step: 144/77, loss: 0.025525707751512527 2023-01-23 22:48:03.699433: step: 148/77, loss: 0.009272797964513302 2023-01-23 22:48:04.998801: step: 152/77, loss: 0.011932496912777424 2023-01-23 22:48:06.333577: step: 156/77, loss: 0.010740198194980621 2023-01-23 22:48:07.596138: step: 160/77, loss: 0.012506979517638683 2023-01-23 22:48:08.921329: step: 164/77, loss: 0.03089609183371067 2023-01-23 22:48:10.258694: step: 168/77, loss: 0.03214671462774277 2023-01-23 22:48:11.611573: step: 172/77, loss: 0.01356554962694645 2023-01-23 22:48:12.873190: step: 176/77, loss: 0.005248316563665867 2023-01-23 22:48:14.208647: step: 180/77, loss: 0.017001446336507797 2023-01-23 22:48:15.562735: step: 184/77, loss: 0.016217608004808426 2023-01-23 22:48:16.923499: step: 188/77, loss: 0.0104384645819664 2023-01-23 22:48:18.195447: step: 192/77, loss: 0.002319543156772852 2023-01-23 22:48:19.505143: step: 196/77, loss: 0.02026873268187046 2023-01-23 22:48:20.806916: step: 200/77, loss: 0.014656349085271358 2023-01-23 22:48:22.100780: step: 204/77, loss: 0.026904229074716568 2023-01-23 22:48:23.419871: step: 208/77, loss: 0.02369135618209839 2023-01-23 22:48:24.687496: step: 212/77, loss: 0.030837981030344963 2023-01-23 22:48:25.978916: step: 216/77, loss: 0.021040054038167 2023-01-23 22:48:27.339931: step: 220/77, loss: 0.04531940072774887 2023-01-23 22:48:28.681949: step: 224/77, loss: 0.030913038179278374 2023-01-23 22:48:30.006987: step: 228/77, loss: 0.0013430267572402954 2023-01-23 22:48:31.284601: step: 232/77, loss: 0.006149583961814642 2023-01-23 22:48:32.608444: step: 236/77, loss: 0.12254571914672852 2023-01-23 22:48:33.935235: step: 240/77, loss: 0.1943351775407791 2023-01-23 22:48:35.221731: step: 244/77, loss: 0.023108499124646187 2023-01-23 22:48:36.492407: step: 248/77, loss: 0.007678337395191193 2023-01-23 22:48:37.753560: step: 252/77, loss: 0.00511655118316412 2023-01-23 22:48:39.069804: step: 256/77, loss: 0.08207176625728607 2023-01-23 22:48:40.389922: step: 260/77, loss: 0.1483311504125595 2023-01-23 22:48:41.672239: step: 264/77, loss: 0.005785231478512287 2023-01-23 22:48:43.001081: step: 268/77, loss: 0.014274337328970432 2023-01-23 22:48:44.303298: step: 272/77, loss: 0.057694658637046814 2023-01-23 22:48:45.651174: step: 276/77, loss: 0.008485383354127407 2023-01-23 22:48:46.968885: step: 280/77, loss: 0.004589818883687258 2023-01-23 22:48:48.267101: step: 284/77, loss: 0.04261079430580139 2023-01-23 22:48:49.549009: step: 288/77, loss: 0.030632158741354942 2023-01-23 22:48:50.823469: step: 292/77, loss: 0.003071536310017109 2023-01-23 22:48:52.165365: step: 296/77, loss: 0.06164884567260742 2023-01-23 22:48:53.462375: step: 300/77, loss: 0.07565826177597046 2023-01-23 22:48:54.769458: step: 304/77, loss: 0.02637699618935585 2023-01-23 22:48:56.092535: step: 308/77, loss: 0.053700368851423264 2023-01-23 22:48:57.365590: step: 312/77, loss: 0.032959625124931335 2023-01-23 22:48:58.633532: step: 316/77, loss: 0.05380704253911972 2023-01-23 22:49:00.041284: step: 320/77, loss: 0.01774766482412815 2023-01-23 22:49:01.346077: step: 324/77, loss: 0.04747108370065689 2023-01-23 22:49:02.641375: step: 328/77, loss: 0.02787405252456665 2023-01-23 22:49:03.987826: step: 332/77, loss: 0.0015647481195628643 2023-01-23 22:49:05.327845: step: 336/77, loss: 0.02628343552350998 2023-01-23 22:49:06.645720: step: 340/77, loss: 0.0313742458820343 2023-01-23 22:49:07.958348: step: 344/77, loss: 0.00651584193110466 2023-01-23 22:49:09.303811: step: 348/77, loss: 0.006083859130740166 2023-01-23 22:49:10.624651: step: 352/77, loss: 0.028530307114124298 2023-01-23 22:49:11.932170: step: 356/77, loss: 0.017432240769267082 2023-01-23 22:49:13.234101: step: 360/77, loss: 0.028564533218741417 2023-01-23 22:49:14.589143: step: 364/77, loss: 0.008658488281071186 2023-01-23 22:49:15.898611: step: 368/77, loss: 0.0064378841780126095 2023-01-23 22:49:17.197054: step: 372/77, loss: 0.08271316438913345 2023-01-23 22:49:18.529177: step: 376/77, loss: 0.037459179759025574 2023-01-23 22:49:19.907447: step: 380/77, loss: 0.002762680407613516 2023-01-23 22:49:21.229692: step: 384/77, loss: 0.038842104375362396 2023-01-23 22:49:22.582478: step: 388/77, loss: 0.02973165735602379 ================================================== Loss: 0.032 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 5} Test Chinese: {'template': {'p': 0.971830985915493, 'r': 0.5476190476190477, 'f1': 0.7005076142131981}, 'slot': {'p': 0.7027027027027027, 'r': 0.022336769759450172, 'f1': 0.04329725228975853}, 'combined': 0.030330054903485677, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 5} Test Korean: {'template': {'p': 0.9714285714285714, 'r': 0.5396825396825397, 'f1': 0.6938775510204082}, 'slot': {'p': 0.6944444444444444, 'r': 0.02147766323024055, 'f1': 0.041666666666666664}, 'combined': 0.028911564625850338, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 5} Test Russian: {'template': {'p': 0.9710144927536232, 'r': 0.5317460317460317, 'f1': 0.6871794871794872}, 'slot': {'p': 0.7058823529411765, 'r': 0.020618556701030927, 'f1': 0.04006677796327212}, 'combined': 0.02753306793373571, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 6 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:51:04.419870: step: 4/77, loss: 0.03020402044057846 2023-01-23 22:51:05.718922: step: 8/77, loss: 0.06450265645980835 2023-01-23 22:51:07.040360: step: 12/77, loss: 0.022785644978284836 2023-01-23 22:51:08.381179: step: 16/77, loss: 0.035041648894548416 2023-01-23 22:51:09.697960: step: 20/77, loss: 0.00880738440901041 2023-01-23 22:51:10.960185: step: 24/77, loss: 0.0014209969667717814 2023-01-23 22:51:12.203687: step: 28/77, loss: 0.014696823433041573 2023-01-23 22:51:13.499063: step: 32/77, loss: 0.06408432126045227 2023-01-23 22:51:14.845697: step: 36/77, loss: 0.023726556450128555 2023-01-23 22:51:16.141856: step: 40/77, loss: 0.01294927392154932 2023-01-23 22:51:17.447971: step: 44/77, loss: 0.02006208896636963 2023-01-23 22:51:18.754271: step: 48/77, loss: 0.007329146843403578 2023-01-23 22:51:20.085957: step: 52/77, loss: 0.01106266863644123 2023-01-23 22:51:21.402812: step: 56/77, loss: 0.0002934904769062996 2023-01-23 22:51:22.683076: step: 60/77, loss: 0.0036544944159686565 2023-01-23 22:51:24.036665: step: 64/77, loss: 0.011319036595523357 2023-01-23 22:51:25.344974: step: 68/77, loss: 0.05203656852245331 2023-01-23 22:51:26.560315: step: 72/77, loss: 0.04454466328024864 2023-01-23 22:51:27.902508: step: 76/77, loss: 0.05974990129470825 2023-01-23 22:51:29.224763: step: 80/77, loss: 0.05492483824491501 2023-01-23 22:51:30.524307: step: 84/77, loss: 0.0255147572606802 2023-01-23 22:51:31.849985: step: 88/77, loss: 0.05782134830951691 2023-01-23 22:51:33.170642: step: 92/77, loss: 0.03452653810381889 2023-01-23 22:51:34.432294: step: 96/77, loss: 0.0071827988140285015 2023-01-23 22:51:35.769427: step: 100/77, loss: 0.039882779121398926 2023-01-23 22:51:37.106409: step: 104/77, loss: 0.028421707451343536 2023-01-23 22:51:38.423176: step: 108/77, loss: 0.010081905871629715 2023-01-23 22:51:39.761629: step: 112/77, loss: 0.00359415914863348 2023-01-23 22:51:41.033682: step: 116/77, loss: 0.005187658593058586 2023-01-23 22:51:42.331057: step: 120/77, loss: 0.04282506927847862 2023-01-23 22:51:43.660534: step: 124/77, loss: 0.044339973479509354 2023-01-23 22:51:44.958297: step: 128/77, loss: 0.0011450829915702343 2023-01-23 22:51:46.289702: step: 132/77, loss: 0.016200868412852287 2023-01-23 22:51:47.543581: step: 136/77, loss: 0.040591076016426086 2023-01-23 22:51:48.832318: step: 140/77, loss: 0.0006642768858000636 2023-01-23 22:51:50.114820: step: 144/77, loss: 0.028512677177786827 2023-01-23 22:51:51.442278: step: 148/77, loss: 0.04284897446632385 2023-01-23 22:51:52.730073: step: 152/77, loss: 0.006877818610519171 2023-01-23 22:51:54.024829: step: 156/77, loss: 0.015897460281848907 2023-01-23 22:51:55.351341: step: 160/77, loss: 0.01649112068116665 2023-01-23 22:51:56.671225: step: 164/77, loss: 0.00428872462362051 2023-01-23 22:51:57.958978: step: 168/77, loss: 0.00872961524873972 2023-01-23 22:51:59.341202: step: 172/77, loss: 0.42743775248527527 2023-01-23 22:52:00.660762: step: 176/77, loss: 0.02830498293042183 2023-01-23 22:52:01.977831: step: 180/77, loss: 0.014903073199093342 2023-01-23 22:52:03.355133: step: 184/77, loss: 0.00024340944946743548 2023-01-23 22:52:04.698178: step: 188/77, loss: 0.010939395055174828 2023-01-23 22:52:06.006630: step: 192/77, loss: 0.08201880753040314 2023-01-23 22:52:07.307749: step: 196/77, loss: 0.016874713823199272 2023-01-23 22:52:08.573153: step: 200/77, loss: 0.014531994238495827 2023-01-23 22:52:09.858387: step: 204/77, loss: 0.0049271308816969395 2023-01-23 22:52:11.174136: step: 208/77, loss: 0.04061633720993996 2023-01-23 22:52:12.445885: step: 212/77, loss: 0.002478919690474868 2023-01-23 22:52:13.773032: step: 216/77, loss: 0.04487251490354538 2023-01-23 22:52:15.098040: step: 220/77, loss: 0.022969551384449005 2023-01-23 22:52:16.438180: step: 224/77, loss: 0.01762833259999752 2023-01-23 22:52:17.742742: step: 228/77, loss: 0.023906050249934196 2023-01-23 22:52:19.001203: step: 232/77, loss: 0.006751799024641514 2023-01-23 22:52:20.290999: step: 236/77, loss: 0.02012111060321331 2023-01-23 22:52:21.607801: step: 240/77, loss: 0.0218367762863636 2023-01-23 22:52:22.920881: step: 244/77, loss: 0.002203400479629636 2023-01-23 22:52:24.206217: step: 248/77, loss: 0.03326624631881714 2023-01-23 22:52:25.493069: step: 252/77, loss: 0.018131040036678314 2023-01-23 22:52:26.816107: step: 256/77, loss: 0.017648430541157722 2023-01-23 22:52:28.120407: step: 260/77, loss: 0.049811914563179016 2023-01-23 22:52:29.409105: step: 264/77, loss: 0.013499320484697819 2023-01-23 22:52:30.717189: step: 268/77, loss: 0.00505072483792901 2023-01-23 22:52:32.020210: step: 272/77, loss: 0.0012601492926478386 2023-01-23 22:52:33.304641: step: 276/77, loss: 0.011764097958803177 2023-01-23 22:52:34.590911: step: 280/77, loss: 0.008309504017233849 2023-01-23 22:52:35.915472: step: 284/77, loss: 0.0017276185099035501 2023-01-23 22:52:37.209845: step: 288/77, loss: 0.004906029440462589 2023-01-23 22:52:38.514143: step: 292/77, loss: 0.012138359248638153 2023-01-23 22:52:39.829037: step: 296/77, loss: 0.03456299006938934 2023-01-23 22:52:41.153323: step: 300/77, loss: 0.0358954556286335 2023-01-23 22:52:42.456730: step: 304/77, loss: 0.0026492213364690542 2023-01-23 22:52:43.752349: step: 308/77, loss: 0.010927144438028336 2023-01-23 22:52:45.098492: step: 312/77, loss: 0.17556488513946533 2023-01-23 22:52:46.364941: step: 316/77, loss: 0.02160274237394333 2023-01-23 22:52:47.718228: step: 320/77, loss: 0.0010978600475937128 2023-01-23 22:52:48.979484: step: 324/77, loss: 0.03171160817146301 2023-01-23 22:52:50.254191: step: 328/77, loss: 0.017102569341659546 2023-01-23 22:52:51.517877: step: 332/77, loss: 0.00043492187978699803 2023-01-23 22:52:52.828182: step: 336/77, loss: 0.014497132040560246 2023-01-23 22:52:54.146053: step: 340/77, loss: 0.03126572445034981 2023-01-23 22:52:55.449864: step: 344/77, loss: 0.08123748749494553 2023-01-23 22:52:56.755793: step: 348/77, loss: 0.014668785035610199 2023-01-23 22:52:58.093565: step: 352/77, loss: 0.08094578236341476 2023-01-23 22:52:59.351670: step: 356/77, loss: 0.00300383847206831 2023-01-23 22:53:00.627087: step: 360/77, loss: 0.0017249882221221924 2023-01-23 22:53:01.911794: step: 364/77, loss: 0.00893208384513855 2023-01-23 22:53:03.226107: step: 368/77, loss: 0.008719152770936489 2023-01-23 22:53:04.543743: step: 372/77, loss: 0.05079510062932968 2023-01-23 22:53:05.901735: step: 376/77, loss: 0.010510473512113094 2023-01-23 22:53:07.172124: step: 380/77, loss: 0.01431712880730629 2023-01-23 22:53:08.482056: step: 384/77, loss: 0.005489309784024954 2023-01-23 22:53:09.818841: step: 388/77, loss: 0.0007201767875812948 ================================================== Loss: 0.027 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 6} Test Chinese: {'template': {'p': 0.9558823529411765, 'r': 0.5158730158730159, 'f1': 0.6701030927835052}, 'slot': {'p': 0.5555555555555556, 'r': 0.01288659793814433, 'f1': 0.02518891687657431}, 'combined': 0.016879171102859074, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 6} Test Korean: {'template': {'p': 0.9558823529411765, 'r': 0.5158730158730159, 'f1': 0.6701030927835052}, 'slot': {'p': 0.5555555555555556, 'r': 0.01288659793814433, 'f1': 0.02518891687657431}, 'combined': 0.016879171102859074, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 6} Test Russian: {'template': {'p': 0.9558823529411765, 'r': 0.5158730158730159, 'f1': 0.6701030927835052}, 'slot': {'p': 0.5555555555555556, 'r': 0.01288659793814433, 'f1': 0.02518891687657431}, 'combined': 0.016879171102859074, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 6} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 7 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:54:51.377109: step: 4/77, loss: 0.039592523127794266 2023-01-23 22:54:52.714589: step: 8/77, loss: 0.03647180274128914 2023-01-23 22:54:53.979102: step: 12/77, loss: 0.05950095131993294 2023-01-23 22:54:55.262603: step: 16/77, loss: 0.020810147747397423 2023-01-23 22:54:56.537767: step: 20/77, loss: 0.049334120005369186 2023-01-23 22:54:57.850494: step: 24/77, loss: 0.011051503010094166 2023-01-23 22:54:59.174969: step: 28/77, loss: 0.13336032629013062 2023-01-23 22:55:00.508129: step: 32/77, loss: 0.03745570033788681 2023-01-23 22:55:01.843345: step: 36/77, loss: 0.009318767115473747 2023-01-23 22:55:03.154122: step: 40/77, loss: 0.020486967638134956 2023-01-23 22:55:04.432115: step: 44/77, loss: 0.04494426026940346 2023-01-23 22:55:05.780525: step: 48/77, loss: 0.006008772645145655 2023-01-23 22:55:07.078675: step: 52/77, loss: 0.003612846601754427 2023-01-23 22:55:08.460883: step: 56/77, loss: 0.01742379181087017 2023-01-23 22:55:09.787192: step: 60/77, loss: 0.006468200124800205 2023-01-23 22:55:11.087787: step: 64/77, loss: 0.004890457261353731 2023-01-23 22:55:12.370359: step: 68/77, loss: 0.0017555034719407558 2023-01-23 22:55:13.632300: step: 72/77, loss: 0.10227682441473007 2023-01-23 22:55:14.901765: step: 76/77, loss: 0.01095428504049778 2023-01-23 22:55:16.189347: step: 80/77, loss: 0.03547348454594612 2023-01-23 22:55:17.468097: step: 84/77, loss: 0.014274870045483112 2023-01-23 22:55:18.813452: step: 88/77, loss: 0.019179657101631165 2023-01-23 22:55:20.157705: step: 92/77, loss: 0.001970258541405201 2023-01-23 22:55:21.439100: step: 96/77, loss: 0.004264642484486103 2023-01-23 22:55:22.789053: step: 100/77, loss: 0.007505690213292837 2023-01-23 22:55:24.080947: step: 104/77, loss: 0.009430565871298313 2023-01-23 22:55:25.336301: step: 108/77, loss: 0.00442750146612525 2023-01-23 22:55:26.688630: step: 112/77, loss: 0.0187971368432045 2023-01-23 22:55:28.083988: step: 116/77, loss: 0.0029458203352987766 2023-01-23 22:55:29.376663: step: 120/77, loss: 0.0011755856685340405 2023-01-23 22:55:30.674250: step: 124/77, loss: 0.05318979546427727 2023-01-23 22:55:31.972589: step: 128/77, loss: 0.07241583615541458 2023-01-23 22:55:33.321667: step: 132/77, loss: 0.03433309495449066 2023-01-23 22:55:34.640400: step: 136/77, loss: 0.006371957249939442 2023-01-23 22:55:35.940351: step: 140/77, loss: 0.00034351838985458016 2023-01-23 22:55:37.288130: step: 144/77, loss: 0.02569696307182312 2023-01-23 22:55:38.581508: step: 148/77, loss: 0.012018335051834583 2023-01-23 22:55:39.892924: step: 152/77, loss: 0.0008478729287162423 2023-01-23 22:55:41.176536: step: 156/77, loss: 0.00034335945383645594 2023-01-23 22:55:42.489741: step: 160/77, loss: 0.0005546339671127498 2023-01-23 22:55:43.805431: step: 164/77, loss: 0.007714861538261175 2023-01-23 22:55:45.204264: step: 168/77, loss: 0.024812553077936172 2023-01-23 22:55:46.521863: step: 172/77, loss: 0.032302480190992355 2023-01-23 22:55:47.836233: step: 176/77, loss: 0.054514043033123016 2023-01-23 22:55:49.157991: step: 180/77, loss: 0.0036903393920511007 2023-01-23 22:55:50.483383: step: 184/77, loss: 0.03750649094581604 2023-01-23 22:55:51.780751: step: 188/77, loss: 0.0182705819606781 2023-01-23 22:55:53.092299: step: 192/77, loss: 0.03227635845541954 2023-01-23 22:55:54.388446: step: 196/77, loss: 0.012850667349994183 2023-01-23 22:55:55.639245: step: 200/77, loss: 0.02886662445962429 2023-01-23 22:55:56.928398: step: 204/77, loss: 0.011203744448721409 2023-01-23 22:55:58.240733: step: 208/77, loss: 0.004409831017255783 2023-01-23 22:55:59.558691: step: 212/77, loss: 0.04057746380567551 2023-01-23 22:56:00.886430: step: 216/77, loss: 0.01268512848764658 2023-01-23 22:56:02.161310: step: 220/77, loss: 0.002502129413187504 2023-01-23 22:56:03.466934: step: 224/77, loss: 0.03674924001097679 2023-01-23 22:56:04.711149: step: 228/77, loss: 0.05796860530972481 2023-01-23 22:56:06.007187: step: 232/77, loss: 0.05133785307407379 2023-01-23 22:56:07.311515: step: 236/77, loss: 0.006198606453835964 2023-01-23 22:56:08.617110: step: 240/77, loss: 0.10119032859802246 2023-01-23 22:56:09.927007: step: 244/77, loss: 0.02887742966413498 2023-01-23 22:56:11.223121: step: 248/77, loss: 0.024643737822771072 2023-01-23 22:56:12.507422: step: 252/77, loss: 0.057729966938495636 2023-01-23 22:56:13.836484: step: 256/77, loss: 0.029447536915540695 2023-01-23 22:56:15.170321: step: 260/77, loss: 0.06826838105916977 2023-01-23 22:56:16.507503: step: 264/77, loss: 0.005282876547425985 2023-01-23 22:56:17.830590: step: 268/77, loss: 0.03360510990023613 2023-01-23 22:56:19.136132: step: 272/77, loss: 0.0028833940159529448 2023-01-23 22:56:20.428899: step: 276/77, loss: 0.0013751761289313436 2023-01-23 22:56:21.730330: step: 280/77, loss: 0.023520752787590027 2023-01-23 22:56:23.039965: step: 284/77, loss: 0.003494781441986561 2023-01-23 22:56:24.368115: step: 288/77, loss: 0.01919829472899437 2023-01-23 22:56:25.677036: step: 292/77, loss: 0.017101947218179703 2023-01-23 22:56:26.978946: step: 296/77, loss: 0.04143914580345154 2023-01-23 22:56:28.273620: step: 300/77, loss: 0.005428193137049675 2023-01-23 22:56:29.585426: step: 304/77, loss: 0.012205487117171288 2023-01-23 22:56:30.889808: step: 308/77, loss: 0.01458023302257061 2023-01-23 22:56:32.239113: step: 312/77, loss: 0.03651302307844162 2023-01-23 22:56:33.566266: step: 316/77, loss: 0.006091257557272911 2023-01-23 22:56:34.922784: step: 320/77, loss: 0.006200199481099844 2023-01-23 22:56:36.243279: step: 324/77, loss: 0.03252246230840683 2023-01-23 22:56:37.588174: step: 328/77, loss: 0.017601091414690018 2023-01-23 22:56:38.921786: step: 332/77, loss: 0.002433488378301263 2023-01-23 22:56:40.238501: step: 336/77, loss: 0.06904677301645279 2023-01-23 22:56:41.544430: step: 340/77, loss: 0.006120236124843359 2023-01-23 22:56:42.864571: step: 344/77, loss: 0.007540901191532612 2023-01-23 22:56:44.176256: step: 348/77, loss: 0.009963840246200562 2023-01-23 22:56:45.474109: step: 352/77, loss: 0.003343338379636407 2023-01-23 22:56:46.752599: step: 356/77, loss: 0.034836817532777786 2023-01-23 22:56:48.102892: step: 360/77, loss: 0.01343780942261219 2023-01-23 22:56:49.417840: step: 364/77, loss: 0.00487480266019702 2023-01-23 22:56:50.688604: step: 368/77, loss: 0.025294139981269836 2023-01-23 22:56:51.973200: step: 372/77, loss: 0.06717772036790848 2023-01-23 22:56:53.328916: step: 376/77, loss: 0.01642705500125885 2023-01-23 22:56:54.630317: step: 380/77, loss: 0.010443812236189842 2023-01-23 22:56:55.945906: step: 384/77, loss: 0.00295252725481987 2023-01-23 22:56:57.255369: step: 388/77, loss: 0.027328480035066605 ================================================== Loss: 0.024 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Chinese: {'template': {'p': 0.9565217391304348, 'r': 0.5238095238095238, 'f1': 0.6769230769230768}, 'slot': {'p': 0.5714285714285714, 'r': 0.010309278350515464, 'f1': 0.020253164556962026}, 'combined': 0.01370983446932814, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Korean: {'template': {'p': 0.9558823529411765, 'r': 0.5158730158730159, 'f1': 0.6701030927835052}, 'slot': {'p': 0.5714285714285714, 'r': 0.010309278350515464, 'f1': 0.020253164556962026}, 'combined': 0.013571708208273523, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 7} Test Russian: {'template': {'p': 0.9558823529411765, 'r': 0.5158730158730159, 'f1': 0.6701030927835052}, 'slot': {'p': 0.5714285714285714, 'r': 0.010309278350515464, 'f1': 0.020253164556962026}, 'combined': 0.013571708208273523, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 8 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 22:58:39.182700: step: 4/77, loss: 0.002951761707663536 2023-01-23 22:58:40.496865: step: 8/77, loss: 0.011420158669352531 2023-01-23 22:58:41.837774: step: 12/77, loss: 0.030093315988779068 2023-01-23 22:58:43.092341: step: 16/77, loss: 0.010848336853086948 2023-01-23 22:58:44.394104: step: 20/77, loss: 0.028082571923732758 2023-01-23 22:58:45.728401: step: 24/77, loss: 0.006531550548970699 2023-01-23 22:58:47.041505: step: 28/77, loss: 0.017639540135860443 2023-01-23 22:58:48.422630: step: 32/77, loss: 0.002651860937476158 2023-01-23 22:58:49.687330: step: 36/77, loss: 0.010134564712643623 2023-01-23 22:58:51.019218: step: 40/77, loss: 0.03160660341382027 2023-01-23 22:58:52.370666: step: 44/77, loss: 0.005230441689491272 2023-01-23 22:58:53.655801: step: 48/77, loss: 0.030870024114847183 2023-01-23 22:58:54.968711: step: 52/77, loss: 0.02877563051879406 2023-01-23 22:58:56.328122: step: 56/77, loss: 0.0063386764377355576 2023-01-23 22:58:57.641519: step: 60/77, loss: 0.03188091516494751 2023-01-23 22:58:58.925152: step: 64/77, loss: 0.010966446250677109 2023-01-23 22:59:00.259812: step: 68/77, loss: 0.002127768937498331 2023-01-23 22:59:01.565419: step: 72/77, loss: 0.0037651783786714077 2023-01-23 22:59:02.854566: step: 76/77, loss: 0.016201186925172806 2023-01-23 22:59:04.152944: step: 80/77, loss: 9.222197695635259e-05 2023-01-23 22:59:05.425637: step: 84/77, loss: 0.024649446830153465 2023-01-23 22:59:06.688955: step: 88/77, loss: 0.0013431230327114463 2023-01-23 22:59:08.014474: step: 92/77, loss: 0.011011897586286068 2023-01-23 22:59:09.329157: step: 96/77, loss: 0.0007366195786744356 2023-01-23 22:59:10.582170: step: 100/77, loss: 0.02774541825056076 2023-01-23 22:59:11.926375: step: 104/77, loss: 0.022737201303243637 2023-01-23 22:59:13.188066: step: 108/77, loss: 0.021142397075891495 2023-01-23 22:59:14.506132: step: 112/77, loss: 0.0162353478372097 2023-01-23 22:59:15.803424: step: 116/77, loss: 0.01265695784240961 2023-01-23 22:59:17.137887: step: 120/77, loss: 0.011829703114926815 2023-01-23 22:59:18.406621: step: 124/77, loss: 0.016484301537275314 2023-01-23 22:59:19.694043: step: 128/77, loss: 0.0007581686368212104 2023-01-23 22:59:21.031104: step: 132/77, loss: 0.0033396773505955935 2023-01-23 22:59:22.350139: step: 136/77, loss: 0.07784155011177063 2023-01-23 22:59:23.659768: step: 140/77, loss: 0.0069819167256355286 2023-01-23 22:59:24.925781: step: 144/77, loss: 0.12047009915113449 2023-01-23 22:59:26.209935: step: 148/77, loss: 0.003101084381341934 2023-01-23 22:59:27.527241: step: 152/77, loss: 0.005879676900804043 2023-01-23 22:59:28.845110: step: 156/77, loss: 0.012600190006196499 2023-01-23 22:59:30.167433: step: 160/77, loss: 0.034457940608263016 2023-01-23 22:59:31.492699: step: 164/77, loss: 0.001940029440447688 2023-01-23 22:59:32.803894: step: 168/77, loss: 0.018188534304499626 2023-01-23 22:59:34.104564: step: 172/77, loss: 0.019673490896821022 2023-01-23 22:59:35.375155: step: 176/77, loss: 0.004080579150468111 2023-01-23 22:59:36.658938: step: 180/77, loss: 0.013569517992436886 2023-01-23 22:59:37.937785: step: 184/77, loss: 0.0006934780394658446 2023-01-23 22:59:39.236254: step: 188/77, loss: 0.001094447448849678 2023-01-23 22:59:40.598228: step: 192/77, loss: 0.016190217807888985 2023-01-23 22:59:41.970465: step: 196/77, loss: 0.021863294765353203 2023-01-23 22:59:43.285862: step: 200/77, loss: 0.01695844903588295 2023-01-23 22:59:44.576988: step: 204/77, loss: 0.038087327033281326 2023-01-23 22:59:45.911583: step: 208/77, loss: 0.02763482555747032 2023-01-23 22:59:47.226578: step: 212/77, loss: 0.02110038697719574 2023-01-23 22:59:48.538928: step: 216/77, loss: 0.038932498544454575 2023-01-23 22:59:49.841364: step: 220/77, loss: 0.026587240397930145 2023-01-23 22:59:51.108480: step: 224/77, loss: 0.00013890476839151233 2023-01-23 22:59:52.397744: step: 228/77, loss: 0.10731155425310135 2023-01-23 22:59:53.665269: step: 232/77, loss: 0.008701834827661514 2023-01-23 22:59:54.944946: step: 236/77, loss: 0.0001420244516339153 2023-01-23 22:59:56.264826: step: 240/77, loss: 0.0021140193566679955 2023-01-23 22:59:57.596290: step: 244/77, loss: 0.00019444481586106122 2023-01-23 22:59:58.914038: step: 248/77, loss: 0.03828136622905731 2023-01-23 23:00:00.249678: step: 252/77, loss: 0.11219515651464462 2023-01-23 23:00:01.620574: step: 256/77, loss: 0.007098965346813202 2023-01-23 23:00:02.931710: step: 260/77, loss: 0.01880680024623871 2023-01-23 23:00:04.213285: step: 264/77, loss: 0.11024859547615051 2023-01-23 23:00:05.519972: step: 268/77, loss: 0.004602618515491486 2023-01-23 23:00:06.776349: step: 272/77, loss: 0.005424637347459793 2023-01-23 23:00:08.072120: step: 276/77, loss: 0.016098979860544205 2023-01-23 23:00:09.375764: step: 280/77, loss: 0.007525038905441761 2023-01-23 23:00:10.716140: step: 284/77, loss: 0.01623627543449402 2023-01-23 23:00:12.009799: step: 288/77, loss: 0.020228806883096695 2023-01-23 23:00:13.295097: step: 292/77, loss: 0.00013010915427003056 2023-01-23 23:00:14.609889: step: 296/77, loss: 0.001430353382602334 2023-01-23 23:00:15.904474: step: 300/77, loss: 0.004590929951518774 2023-01-23 23:00:17.216422: step: 304/77, loss: 0.007614678703248501 2023-01-23 23:00:18.550292: step: 308/77, loss: 0.028848322108387947 2023-01-23 23:00:19.820051: step: 312/77, loss: 0.012136640027165413 2023-01-23 23:00:21.078725: step: 316/77, loss: 0.0143202506005764 2023-01-23 23:00:22.345270: step: 320/77, loss: 0.0036574748810380697 2023-01-23 23:00:23.662752: step: 324/77, loss: 0.014181990176439285 2023-01-23 23:00:24.931998: step: 328/77, loss: 0.047021135687828064 2023-01-23 23:00:26.231369: step: 332/77, loss: 0.0044871168211102486 2023-01-23 23:00:27.496872: step: 336/77, loss: 0.012777280993759632 2023-01-23 23:00:28.829220: step: 340/77, loss: 0.011580890975892544 2023-01-23 23:00:30.159454: step: 344/77, loss: 0.01588420197367668 2023-01-23 23:00:31.457120: step: 348/77, loss: 0.09475565701723099 2023-01-23 23:00:32.756621: step: 352/77, loss: 0.02808484062552452 2023-01-23 23:00:34.048221: step: 356/77, loss: 0.006215309724211693 2023-01-23 23:00:35.361858: step: 360/77, loss: 0.005064602941274643 2023-01-23 23:00:36.699411: step: 364/77, loss: 0.015820614993572235 2023-01-23 23:00:37.974503: step: 368/77, loss: 0.02967149205505848 2023-01-23 23:00:39.307771: step: 372/77, loss: 0.008606133982539177 2023-01-23 23:00:40.601364: step: 376/77, loss: 0.05700630322098732 2023-01-23 23:00:41.898794: step: 380/77, loss: 0.012348588556051254 2023-01-23 23:00:43.213284: step: 384/77, loss: 0.0003217856865376234 2023-01-23 23:00:44.542484: step: 388/77, loss: 0.01081857644021511 ================================================== Loss: 0.020 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5555555555555556, 'f1': 0.7000000000000001}, 'slot': {'p': 0.625, 'r': 0.01718213058419244, 'f1': 0.033444816053511704}, 'combined': 0.023411371237458196, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5555555555555556, 'f1': 0.7000000000000001}, 'slot': {'p': 0.6363636363636364, 'r': 0.01804123711340206, 'f1': 0.035087719298245605}, 'combined': 0.024561403508771926, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5555555555555556, 'f1': 0.7000000000000001}, 'slot': {'p': 0.625, 'r': 0.01718213058419244, 'f1': 0.033444816053511704}, 'combined': 0.023411371237458196, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 9 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:02:26.204320: step: 4/77, loss: 0.008130619302392006 2023-01-23 23:02:27.507277: step: 8/77, loss: 0.0005391405429691076 2023-01-23 23:02:28.807522: step: 12/77, loss: 0.011194856837391853 2023-01-23 23:02:30.095650: step: 16/77, loss: 0.007317467126995325 2023-01-23 23:02:31.448798: step: 20/77, loss: 0.0438869409263134 2023-01-23 23:02:32.757282: step: 24/77, loss: 0.008051110431551933 2023-01-23 23:02:34.058956: step: 28/77, loss: 0.012669816613197327 2023-01-23 23:02:35.311299: step: 32/77, loss: 0.0022855617571622133 2023-01-23 23:02:36.602997: step: 36/77, loss: 0.0064675528556108475 2023-01-23 23:02:37.882506: step: 40/77, loss: 0.001550829503685236 2023-01-23 23:02:39.249001: step: 44/77, loss: 0.01563188061118126 2023-01-23 23:02:40.542497: step: 48/77, loss: 0.008327081799507141 2023-01-23 23:02:41.822300: step: 52/77, loss: 0.008023286238312721 2023-01-23 23:02:43.147512: step: 56/77, loss: 0.003052850253880024 2023-01-23 23:02:44.437164: step: 60/77, loss: 0.04474394768476486 2023-01-23 23:02:45.748516: step: 64/77, loss: 0.009582719765603542 2023-01-23 23:02:47.077824: step: 68/77, loss: 0.01214680913835764 2023-01-23 23:02:48.379185: step: 72/77, loss: 0.0022618744987994432 2023-01-23 23:02:49.694585: step: 76/77, loss: 0.03842214122414589 2023-01-23 23:02:50.989266: step: 80/77, loss: 0.12139374017715454 2023-01-23 23:02:52.365952: step: 84/77, loss: 0.0024616678711026907 2023-01-23 23:02:53.637717: step: 88/77, loss: 0.009868036024272442 2023-01-23 23:02:54.991698: step: 92/77, loss: 0.003964670468121767 2023-01-23 23:02:56.307134: step: 96/77, loss: 0.019232330843806267 2023-01-23 23:02:57.584960: step: 100/77, loss: 0.011210390366613865 2023-01-23 23:02:58.881284: step: 104/77, loss: 0.05151809751987457 2023-01-23 23:03:00.188009: step: 108/77, loss: 0.06597165763378143 2023-01-23 23:03:01.467729: step: 112/77, loss: 0.003417958738282323 2023-01-23 23:03:02.742142: step: 116/77, loss: 0.0020162255968898535 2023-01-23 23:03:04.034439: step: 120/77, loss: 0.03625112771987915 2023-01-23 23:03:05.358691: step: 124/77, loss: 0.021857159212231636 2023-01-23 23:03:06.679622: step: 128/77, loss: 0.015903670340776443 2023-01-23 23:03:07.957511: step: 132/77, loss: 0.004454844631254673 2023-01-23 23:03:09.334217: step: 136/77, loss: 0.011321873404085636 2023-01-23 23:03:10.639578: step: 140/77, loss: 0.07448364049196243 2023-01-23 23:03:11.904334: step: 144/77, loss: 0.025326918810606003 2023-01-23 23:03:13.222117: step: 148/77, loss: 0.00021000817650929093 2023-01-23 23:03:14.549324: step: 152/77, loss: 0.0007694442756474018 2023-01-23 23:03:15.865831: step: 156/77, loss: 0.08295883983373642 2023-01-23 23:03:17.154937: step: 160/77, loss: 0.0010246189776808023 2023-01-23 23:03:18.503609: step: 164/77, loss: 0.016965247690677643 2023-01-23 23:03:19.810256: step: 168/77, loss: 0.011164311319589615 2023-01-23 23:03:21.135263: step: 172/77, loss: 0.0428466722369194 2023-01-23 23:03:22.459322: step: 176/77, loss: 0.043888527899980545 2023-01-23 23:03:23.817419: step: 180/77, loss: 0.03227125480771065 2023-01-23 23:03:25.087256: step: 184/77, loss: 0.01470979955047369 2023-01-23 23:03:26.418957: step: 188/77, loss: 0.007358902599662542 2023-01-23 23:03:27.726401: step: 192/77, loss: 0.02829001471400261 2023-01-23 23:03:28.997726: step: 196/77, loss: 0.006120836362242699 2023-01-23 23:03:30.289033: step: 200/77, loss: 0.017666509374976158 2023-01-23 23:03:31.600139: step: 204/77, loss: 0.007887563668191433 2023-01-23 23:03:32.961216: step: 208/77, loss: 0.07387572526931763 2023-01-23 23:03:34.270802: step: 212/77, loss: 0.00548297306522727 2023-01-23 23:03:35.558112: step: 216/77, loss: 0.14394663274288177 2023-01-23 23:03:36.872595: step: 220/77, loss: 0.0017406389815732837 2023-01-23 23:03:38.199407: step: 224/77, loss: 0.008252009749412537 2023-01-23 23:03:39.504100: step: 228/77, loss: 0.005103731993585825 2023-01-23 23:03:40.811568: step: 232/77, loss: 0.015911363065242767 2023-01-23 23:03:42.121206: step: 236/77, loss: 0.0036538285203278065 2023-01-23 23:03:43.458527: step: 240/77, loss: 0.0744553878903389 2023-01-23 23:03:44.758182: step: 244/77, loss: 0.0005846361164003611 2023-01-23 23:03:46.042135: step: 248/77, loss: 0.003879360156133771 2023-01-23 23:03:47.413882: step: 252/77, loss: 4.1820159822236747e-05 2023-01-23 23:03:48.716792: step: 256/77, loss: 0.05694466084241867 2023-01-23 23:03:50.014504: step: 260/77, loss: 0.00016159679216798395 2023-01-23 23:03:51.359130: step: 264/77, loss: 0.003600452793762088 2023-01-23 23:03:52.657324: step: 268/77, loss: 0.005282798781991005 2023-01-23 23:03:53.985719: step: 272/77, loss: 0.1386595070362091 2023-01-23 23:03:55.287881: step: 276/77, loss: 0.10414771735668182 2023-01-23 23:03:56.648511: step: 280/77, loss: 0.006839843932539225 2023-01-23 23:03:57.960230: step: 284/77, loss: 0.0015076743438839912 2023-01-23 23:03:59.297913: step: 288/77, loss: 0.004393836483359337 2023-01-23 23:04:00.618279: step: 292/77, loss: 0.007599594071507454 2023-01-23 23:04:01.943879: step: 296/77, loss: 0.03950433060526848 2023-01-23 23:04:03.265268: step: 300/77, loss: 0.009844477288424969 2023-01-23 23:04:04.591308: step: 304/77, loss: 0.0013913114089518785 2023-01-23 23:04:05.870416: step: 308/77, loss: 0.12179585546255112 2023-01-23 23:04:07.174128: step: 312/77, loss: 0.005494131240993738 2023-01-23 23:04:08.467417: step: 316/77, loss: 0.09123729169368744 2023-01-23 23:04:09.799418: step: 320/77, loss: 0.011966943740844727 2023-01-23 23:04:11.129823: step: 324/77, loss: 0.00355300260707736 2023-01-23 23:04:12.481097: step: 328/77, loss: 0.06924528628587723 2023-01-23 23:04:13.825911: step: 332/77, loss: 0.13030168414115906 2023-01-23 23:04:15.156582: step: 336/77, loss: 0.04786805436015129 2023-01-23 23:04:16.469370: step: 340/77, loss: 0.05078596621751785 2023-01-23 23:04:17.845812: step: 344/77, loss: 0.011326906271278858 2023-01-23 23:04:19.147442: step: 348/77, loss: 0.028522998094558716 2023-01-23 23:04:20.494786: step: 352/77, loss: 0.06331083178520203 2023-01-23 23:04:21.801337: step: 356/77, loss: 0.003408285556361079 2023-01-23 23:04:23.146786: step: 360/77, loss: 0.011288869194686413 2023-01-23 23:04:24.474391: step: 364/77, loss: 0.030668139457702637 2023-01-23 23:04:25.772569: step: 368/77, loss: 0.0004308921634219587 2023-01-23 23:04:27.049420: step: 372/77, loss: 0.00032320560421794653 2023-01-23 23:04:28.333033: step: 376/77, loss: 0.045046959072351456 2023-01-23 23:04:29.676912: step: 380/77, loss: 0.11515285074710846 2023-01-23 23:04:30.987609: step: 384/77, loss: 0.0003654747852124274 2023-01-23 23:04:32.378294: step: 388/77, loss: 0.021653490141034126 ================================================== Loss: 0.027 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 9} Test Chinese: {'template': {'p': 0.984375, 'r': 0.5, 'f1': 0.6631578947368421}, 'slot': {'p': 0.72, 'r': 0.015463917525773196, 'f1': 0.030277544154751895}, 'combined': 0.020078792439467044, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 9} Test Korean: {'template': {'p': 0.9841269841269841, 'r': 0.49206349206349204, 'f1': 0.656084656084656}, 'slot': {'p': 0.72, 'r': 0.015463917525773196, 'f1': 0.030277544154751895}, 'combined': 0.019864632143858384, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 9} Test Russian: {'template': {'p': 0.9841269841269841, 'r': 0.49206349206349204, 'f1': 0.656084656084656}, 'slot': {'p': 0.72, 'r': 0.015463917525773196, 'f1': 0.030277544154751895}, 'combined': 0.019864632143858384, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 10 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:06:14.307770: step: 4/77, loss: 0.028790917247533798 2023-01-23 23:06:15.593758: step: 8/77, loss: 0.006226943340152502 2023-01-23 23:06:16.914989: step: 12/77, loss: 0.004163344856351614 2023-01-23 23:06:18.195440: step: 16/77, loss: 0.0036746724508702755 2023-01-23 23:06:19.513367: step: 20/77, loss: 0.052779559046030045 2023-01-23 23:06:20.828851: step: 24/77, loss: 0.04778440296649933 2023-01-23 23:06:22.103836: step: 28/77, loss: 0.023326139897108078 2023-01-23 23:06:23.360115: step: 32/77, loss: 0.0023849578574299812 2023-01-23 23:06:24.632063: step: 36/77, loss: 0.012747220695018768 2023-01-23 23:06:25.892200: step: 40/77, loss: 0.0034248887095600367 2023-01-23 23:06:27.207135: step: 44/77, loss: 0.0010681729763746262 2023-01-23 23:06:28.468664: step: 48/77, loss: 0.007092490326613188 2023-01-23 23:06:29.745584: step: 52/77, loss: 0.028929613530635834 2023-01-23 23:06:31.022875: step: 56/77, loss: 0.003579456824809313 2023-01-23 23:06:32.397346: step: 60/77, loss: 0.007527029141783714 2023-01-23 23:06:33.698971: step: 64/77, loss: 0.031638361513614655 2023-01-23 23:06:34.972864: step: 68/77, loss: 0.02865629829466343 2023-01-23 23:06:36.235653: step: 72/77, loss: 0.03119928203523159 2023-01-23 23:06:37.584035: step: 76/77, loss: 0.0003292355395387858 2023-01-23 23:06:38.867171: step: 80/77, loss: 0.010797802358865738 2023-01-23 23:06:40.170189: step: 84/77, loss: 0.0030798588413745165 2023-01-23 23:06:41.444203: step: 88/77, loss: 0.0003887184429913759 2023-01-23 23:06:42.730358: step: 92/77, loss: 0.009819199331104755 2023-01-23 23:06:44.061130: step: 96/77, loss: 0.0035490067675709724 2023-01-23 23:06:45.392515: step: 100/77, loss: 0.0005593973910436034 2023-01-23 23:06:46.704568: step: 104/77, loss: 0.004988205153495073 2023-01-23 23:06:48.061543: step: 108/77, loss: 0.0018749493174254894 2023-01-23 23:06:49.362640: step: 112/77, loss: 0.01050441525876522 2023-01-23 23:06:50.690611: step: 116/77, loss: 0.0016917268512770534 2023-01-23 23:06:52.019680: step: 120/77, loss: 0.013261470943689346 2023-01-23 23:06:53.316680: step: 124/77, loss: 0.0017507218290120363 2023-01-23 23:06:54.667111: step: 128/77, loss: 0.006236909423023462 2023-01-23 23:06:55.952962: step: 132/77, loss: 0.021041784435510635 2023-01-23 23:06:57.219596: step: 136/77, loss: 0.03406795114278793 2023-01-23 23:06:58.511553: step: 140/77, loss: 0.026158465072512627 2023-01-23 23:06:59.800429: step: 144/77, loss: 0.002985805505886674 2023-01-23 23:07:01.108425: step: 148/77, loss: 0.004404544830322266 2023-01-23 23:07:02.432178: step: 152/77, loss: 0.004968182649463415 2023-01-23 23:07:03.757193: step: 156/77, loss: 0.03685709089040756 2023-01-23 23:07:05.070141: step: 160/77, loss: 0.002346716821193695 2023-01-23 23:07:06.378557: step: 164/77, loss: 6.125450454419479e-05 2023-01-23 23:07:07.749025: step: 168/77, loss: 0.006016855128109455 2023-01-23 23:07:09.120523: step: 172/77, loss: 0.01733742654323578 2023-01-23 23:07:10.471115: step: 176/77, loss: 0.08007363975048065 2023-01-23 23:07:11.784069: step: 180/77, loss: 0.013454221189022064 2023-01-23 23:07:13.090132: step: 184/77, loss: 0.023909416049718857 2023-01-23 23:07:14.398518: step: 188/77, loss: 0.01868433691561222 2023-01-23 23:07:15.670346: step: 192/77, loss: 0.0005307840183377266 2023-01-23 23:07:16.974655: step: 196/77, loss: 0.01805320382118225 2023-01-23 23:07:18.265348: step: 200/77, loss: 0.0010465634986758232 2023-01-23 23:07:19.562693: step: 204/77, loss: 0.03729572519659996 2023-01-23 23:07:20.840447: step: 208/77, loss: 0.014979223720729351 2023-01-23 23:07:22.110302: step: 212/77, loss: 0.03879925236105919 2023-01-23 23:07:23.397024: step: 216/77, loss: 0.02371007576584816 2023-01-23 23:07:24.674626: step: 220/77, loss: 0.007413984276354313 2023-01-23 23:07:26.001086: step: 224/77, loss: 4.5996031985851005e-05 2023-01-23 23:07:27.303964: step: 228/77, loss: 0.009440948255360126 2023-01-23 23:07:28.576602: step: 232/77, loss: 0.02789357490837574 2023-01-23 23:07:29.857229: step: 236/77, loss: 0.0020327758975327015 2023-01-23 23:07:31.157438: step: 240/77, loss: 0.005369645543396473 2023-01-23 23:07:32.416260: step: 244/77, loss: 0.010713733732700348 2023-01-23 23:07:33.733101: step: 248/77, loss: 0.0014789579436182976 2023-01-23 23:07:35.027364: step: 252/77, loss: 0.00473722442984581 2023-01-23 23:07:36.321990: step: 256/77, loss: 0.014124809764325619 2023-01-23 23:07:37.658829: step: 260/77, loss: 0.01643744297325611 2023-01-23 23:07:38.962546: step: 264/77, loss: 0.0024889023043215275 2023-01-23 23:07:40.287905: step: 268/77, loss: 0.003109875600785017 2023-01-23 23:07:41.549332: step: 272/77, loss: 0.012875061482191086 2023-01-23 23:07:42.876084: step: 276/77, loss: 0.0017921538092195988 2023-01-23 23:07:44.188145: step: 280/77, loss: 0.01875830627977848 2023-01-23 23:07:45.500059: step: 284/77, loss: 0.003374907886609435 2023-01-23 23:07:46.810163: step: 288/77, loss: 0.0016585986595600843 2023-01-23 23:07:48.140626: step: 292/77, loss: 0.00521429255604744 2023-01-23 23:07:49.465705: step: 296/77, loss: 0.019124671816825867 2023-01-23 23:07:50.802907: step: 300/77, loss: 0.009553453885018826 2023-01-23 23:07:52.113983: step: 304/77, loss: 0.0073772757314145565 2023-01-23 23:07:53.406041: step: 308/77, loss: 0.011843051761388779 2023-01-23 23:07:54.701807: step: 312/77, loss: 0.03154919296503067 2023-01-23 23:07:56.053785: step: 316/77, loss: 0.06556575000286102 2023-01-23 23:07:57.356519: step: 320/77, loss: 0.04244726896286011 2023-01-23 23:07:58.669473: step: 324/77, loss: 0.006225625053048134 2023-01-23 23:07:59.969214: step: 328/77, loss: 0.0034917141310870647 2023-01-23 23:08:01.256815: step: 332/77, loss: 0.010496101342141628 2023-01-23 23:08:02.618964: step: 336/77, loss: 0.024124911054968834 2023-01-23 23:08:03.892502: step: 340/77, loss: 0.007168466225266457 2023-01-23 23:08:05.189751: step: 344/77, loss: 0.004895597230643034 2023-01-23 23:08:06.469052: step: 348/77, loss: 0.06963392347097397 2023-01-23 23:08:07.752514: step: 352/77, loss: 0.025621812790632248 2023-01-23 23:08:09.082726: step: 356/77, loss: 0.010994499549269676 2023-01-23 23:08:10.375271: step: 360/77, loss: 0.0767049491405487 2023-01-23 23:08:11.686122: step: 364/77, loss: 0.02383904531598091 2023-01-23 23:08:13.046722: step: 368/77, loss: 0.004664411302655935 2023-01-23 23:08:14.357898: step: 372/77, loss: 0.009664268232882023 2023-01-23 23:08:15.654984: step: 376/77, loss: 0.04587193951010704 2023-01-23 23:08:16.967216: step: 380/77, loss: 0.0028117981273680925 2023-01-23 23:08:18.244531: step: 384/77, loss: 0.00421349611133337 2023-01-23 23:08:19.552378: step: 388/77, loss: 0.008075060322880745 ================================================== Loss: 0.015 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Chinese: {'template': {'p': 0.9538461538461539, 'r': 0.49206349206349204, 'f1': 0.6492146596858639}, 'slot': {'p': 0.625, 'r': 0.01718213058419244, 'f1': 0.033444816053511704}, 'combined': 0.02171286487243692, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Korean: {'template': {'p': 0.9538461538461539, 'r': 0.49206349206349204, 'f1': 0.6492146596858639}, 'slot': {'p': 0.65625, 'r': 0.01804123711340206, 'f1': 0.03511705685618729}, 'combined': 0.022798508116058765, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Russian: {'template': {'p': 0.953125, 'r': 0.48412698412698413, 'f1': 0.6421052631578947}, 'slot': {'p': 0.625, 'r': 0.01718213058419244, 'f1': 0.033444816053511704}, 'combined': 0.021475092413307514, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 11 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:10:01.249067: step: 4/77, loss: 0.015952421352267265 2023-01-23 23:10:02.538346: step: 8/77, loss: 0.00025507123791612685 2023-01-23 23:10:03.831602: step: 12/77, loss: 0.0044345613569021225 2023-01-23 23:10:05.098927: step: 16/77, loss: 0.0034365570172667503 2023-01-23 23:10:06.374829: step: 20/77, loss: 0.00975167378783226 2023-01-23 23:10:07.713282: step: 24/77, loss: 0.0033859300892800093 2023-01-23 23:10:09.057070: step: 28/77, loss: 0.0035383105278015137 2023-01-23 23:10:10.381995: step: 32/77, loss: 0.02958233840763569 2023-01-23 23:10:11.695309: step: 36/77, loss: 0.006568643264472485 2023-01-23 23:10:13.011491: step: 40/77, loss: 0.007371163927018642 2023-01-23 23:10:14.281716: step: 44/77, loss: 0.026065394282341003 2023-01-23 23:10:15.568972: step: 48/77, loss: 0.12206071615219116 2023-01-23 23:10:16.865013: step: 52/77, loss: 0.019574757665395737 2023-01-23 23:10:18.131077: step: 56/77, loss: 0.004011745098978281 2023-01-23 23:10:19.475896: step: 60/77, loss: 0.00427105650305748 2023-01-23 23:10:20.825911: step: 64/77, loss: 0.045897383242845535 2023-01-23 23:10:22.118599: step: 68/77, loss: 0.0007205940200947225 2023-01-23 23:10:23.448228: step: 72/77, loss: 0.014391254633665085 2023-01-23 23:10:24.757501: step: 76/77, loss: 0.00022145872935652733 2023-01-23 23:10:26.089798: step: 80/77, loss: 0.008357521146535873 2023-01-23 23:10:27.417982: step: 84/77, loss: 0.04798451066017151 2023-01-23 23:10:28.710985: step: 88/77, loss: 0.019816333428025246 2023-01-23 23:10:29.948735: step: 92/77, loss: 0.030351610854268074 2023-01-23 23:10:31.256902: step: 96/77, loss: 0.002286176895722747 2023-01-23 23:10:32.629765: step: 100/77, loss: 0.02866881527006626 2023-01-23 23:10:33.906195: step: 104/77, loss: 0.01667320542037487 2023-01-23 23:10:35.225932: step: 108/77, loss: 0.01816210336983204 2023-01-23 23:10:36.529434: step: 112/77, loss: 0.009158037602901459 2023-01-23 23:10:37.846489: step: 116/77, loss: 0.023456105962395668 2023-01-23 23:10:39.130301: step: 120/77, loss: 0.001066570752300322 2023-01-23 23:10:40.438390: step: 124/77, loss: 0.02048538438975811 2023-01-23 23:10:41.752064: step: 128/77, loss: 0.01709677465260029 2023-01-23 23:10:43.073323: step: 132/77, loss: 0.00894573051482439 2023-01-23 23:10:44.401872: step: 136/77, loss: 0.00696770241484046 2023-01-23 23:10:45.681905: step: 140/77, loss: 0.019813766703009605 2023-01-23 23:10:47.063217: step: 144/77, loss: 0.013972686603665352 2023-01-23 23:10:48.411318: step: 148/77, loss: 0.025065403431653976 2023-01-23 23:10:49.740131: step: 152/77, loss: 0.011014866642653942 2023-01-23 23:10:51.036093: step: 156/77, loss: 0.02622900903224945 2023-01-23 23:10:52.303363: step: 160/77, loss: 0.0027283949311822653 2023-01-23 23:10:53.591276: step: 164/77, loss: 2.7721842343453318e-05 2023-01-23 23:10:54.913204: step: 168/77, loss: 0.025296106934547424 2023-01-23 23:10:56.181183: step: 172/77, loss: 0.004019709303975105 2023-01-23 23:10:57.500348: step: 176/77, loss: 0.009642662480473518 2023-01-23 23:10:58.800419: step: 180/77, loss: 0.007437935099005699 2023-01-23 23:11:00.130527: step: 184/77, loss: 0.00015852319484110922 2023-01-23 23:11:01.479764: step: 188/77, loss: 0.008070101030170918 2023-01-23 23:11:02.808496: step: 192/77, loss: 0.005517785437405109 2023-01-23 23:11:04.144340: step: 196/77, loss: 0.009717223234474659 2023-01-23 23:11:05.440966: step: 200/77, loss: 0.16438964009284973 2023-01-23 23:11:06.759999: step: 204/77, loss: 0.0046204449608922005 2023-01-23 23:11:08.050560: step: 208/77, loss: 0.004777129739522934 2023-01-23 23:11:09.379966: step: 212/77, loss: 0.0024582187179476023 2023-01-23 23:11:10.661674: step: 216/77, loss: 0.0029221922159194946 2023-01-23 23:11:11.996799: step: 220/77, loss: 0.032576870173215866 2023-01-23 23:11:13.289621: step: 224/77, loss: 0.019355500116944313 2023-01-23 23:11:14.629607: step: 228/77, loss: 0.013398583978414536 2023-01-23 23:11:15.987153: step: 232/77, loss: 5.370080907596275e-05 2023-01-23 23:11:17.279168: step: 236/77, loss: 0.014454700984060764 2023-01-23 23:11:18.572336: step: 240/77, loss: 0.05621044710278511 2023-01-23 23:11:19.851980: step: 244/77, loss: 0.009606147184967995 2023-01-23 23:11:21.140503: step: 248/77, loss: 0.007845384068787098 2023-01-23 23:11:22.508702: step: 252/77, loss: 0.009262886829674244 2023-01-23 23:11:23.838543: step: 256/77, loss: 0.08676369488239288 2023-01-23 23:11:25.143948: step: 260/77, loss: 0.01982366479933262 2023-01-23 23:11:26.478041: step: 264/77, loss: 0.06730242818593979 2023-01-23 23:11:27.830904: step: 268/77, loss: 0.0041349404491484165 2023-01-23 23:11:29.122539: step: 272/77, loss: 0.004261372610926628 2023-01-23 23:11:30.465031: step: 276/77, loss: 0.015395049005746841 2023-01-23 23:11:31.760035: step: 280/77, loss: 0.005712614394724369 2023-01-23 23:11:33.137456: step: 284/77, loss: 0.00669367890805006 2023-01-23 23:11:34.440271: step: 288/77, loss: 0.03226980194449425 2023-01-23 23:11:35.730856: step: 292/77, loss: 0.00047052899026311934 2023-01-23 23:11:37.050777: step: 296/77, loss: 0.005096997134387493 2023-01-23 23:11:38.372668: step: 300/77, loss: 0.03474399447441101 2023-01-23 23:11:39.638626: step: 304/77, loss: 0.00014382918016053736 2023-01-23 23:11:40.961905: step: 308/77, loss: 0.0004964787513017654 2023-01-23 23:11:42.246033: step: 312/77, loss: 0.014115167781710625 2023-01-23 23:11:43.550364: step: 316/77, loss: 0.08715243637561798 2023-01-23 23:11:44.872688: step: 320/77, loss: 0.007384343538433313 2023-01-23 23:11:46.168239: step: 324/77, loss: 0.05926787108182907 2023-01-23 23:11:47.502995: step: 328/77, loss: 0.0005082663847133517 2023-01-23 23:11:48.805627: step: 332/77, loss: 0.008011923171579838 2023-01-23 23:11:50.179530: step: 336/77, loss: 0.02457595057785511 2023-01-23 23:11:51.498074: step: 340/77, loss: 0.05150618776679039 2023-01-23 23:11:52.811547: step: 344/77, loss: 0.010762691497802734 2023-01-23 23:11:54.121372: step: 348/77, loss: 0.016093550249934196 2023-01-23 23:11:55.417765: step: 352/77, loss: 0.002383376471698284 2023-01-23 23:11:56.708592: step: 356/77, loss: 0.005355801433324814 2023-01-23 23:11:58.028116: step: 360/77, loss: 0.011093618348240852 2023-01-23 23:11:59.351429: step: 364/77, loss: 7.855845615267754e-05 2023-01-23 23:12:00.680551: step: 368/77, loss: 7.87553217378445e-05 2023-01-23 23:12:02.038037: step: 372/77, loss: 0.00825677439570427 2023-01-23 23:12:03.376847: step: 376/77, loss: 0.051966484636068344 2023-01-23 23:12:04.706901: step: 380/77, loss: 0.01855085790157318 2023-01-23 23:12:06.007499: step: 384/77, loss: 0.02410942129790783 2023-01-23 23:12:07.329009: step: 388/77, loss: 8.27374606160447e-05 ================================================== Loss: 0.018 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Chinese: {'template': {'p': 0.9583333333333334, 'r': 0.5476190476190477, 'f1': 0.696969696969697}, 'slot': {'p': 0.5882352941176471, 'r': 0.01718213058419244, 'f1': 0.0333889816360601}, 'combined': 0.023271108413011585, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Korean: {'template': {'p': 0.9583333333333334, 'r': 0.5476190476190477, 'f1': 0.696969696969697}, 'slot': {'p': 0.5882352941176471, 'r': 0.01718213058419244, 'f1': 0.0333889816360601}, 'combined': 0.023271108413011585, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Russian: {'template': {'p': 0.9583333333333334, 'r': 0.5476190476190477, 'f1': 0.696969696969697}, 'slot': {'p': 0.5882352941176471, 'r': 0.01718213058419244, 'f1': 0.0333889816360601}, 'combined': 0.023271108413011585, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 12 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:13:49.075809: step: 4/77, loss: 0.0001822328777052462 2023-01-23 23:13:50.372898: step: 8/77, loss: 0.0008595406543463469 2023-01-23 23:13:51.648998: step: 12/77, loss: 0.0023720674216747284 2023-01-23 23:13:52.919939: step: 16/77, loss: 0.0035153913777321577 2023-01-23 23:13:54.241756: step: 20/77, loss: 0.0032651075161993504 2023-01-23 23:13:55.564693: step: 24/77, loss: 0.006483836565166712 2023-01-23 23:13:56.864962: step: 28/77, loss: 0.0008740816847421229 2023-01-23 23:13:58.200417: step: 32/77, loss: 0.03994395583868027 2023-01-23 23:13:59.503481: step: 36/77, loss: 0.000109150554635562 2023-01-23 23:14:00.790607: step: 40/77, loss: 0.000269919604761526 2023-01-23 23:14:02.119875: step: 44/77, loss: 0.012205511331558228 2023-01-23 23:14:03.427493: step: 48/77, loss: 0.001035436987876892 2023-01-23 23:14:04.699356: step: 52/77, loss: 0.018521424382925034 2023-01-23 23:14:05.978176: step: 56/77, loss: 0.013126850128173828 2023-01-23 23:14:07.275255: step: 60/77, loss: 0.005567711777985096 2023-01-23 23:14:08.578849: step: 64/77, loss: 0.03030678629875183 2023-01-23 23:14:09.882002: step: 68/77, loss: 0.003879829775542021 2023-01-23 23:14:11.173575: step: 72/77, loss: 0.025746623054146767 2023-01-23 23:14:12.522504: step: 76/77, loss: 0.011330823414027691 2023-01-23 23:14:13.827740: step: 80/77, loss: 0.0010732869850471616 2023-01-23 23:14:15.180302: step: 84/77, loss: 0.06254440546035767 2023-01-23 23:14:16.507805: step: 88/77, loss: 0.000326181179843843 2023-01-23 23:14:17.791120: step: 92/77, loss: 0.053866542875766754 2023-01-23 23:14:19.095508: step: 96/77, loss: 0.002246356336399913 2023-01-23 23:14:20.361531: step: 100/77, loss: 0.012330463156104088 2023-01-23 23:14:21.631844: step: 104/77, loss: 0.03553637117147446 2023-01-23 23:14:22.886969: step: 108/77, loss: 0.00018002027354668826 2023-01-23 23:14:24.159605: step: 112/77, loss: 0.023753199726343155 2023-01-23 23:14:25.454096: step: 116/77, loss: 0.001026954036206007 2023-01-23 23:14:26.777769: step: 120/77, loss: 0.022113390266895294 2023-01-23 23:14:28.108385: step: 124/77, loss: 0.0218508280813694 2023-01-23 23:14:29.377109: step: 128/77, loss: 0.0010070583084598184 2023-01-23 23:14:30.682147: step: 132/77, loss: 0.00040107598761096597 2023-01-23 23:14:32.004231: step: 136/77, loss: 0.04117031395435333 2023-01-23 23:14:33.270509: step: 140/77, loss: 0.00023497387883253396 2023-01-23 23:14:34.551166: step: 144/77, loss: 0.014612888917326927 2023-01-23 23:14:35.818412: step: 148/77, loss: 0.0343783013522625 2023-01-23 23:14:37.117109: step: 152/77, loss: 0.0012751361355185509 2023-01-23 23:14:38.482023: step: 156/77, loss: 0.028205927461385727 2023-01-23 23:14:39.803768: step: 160/77, loss: 0.0010938920313492417 2023-01-23 23:14:41.159401: step: 164/77, loss: 0.00888506043702364 2023-01-23 23:14:42.417833: step: 168/77, loss: 0.0007671800558455288 2023-01-23 23:14:43.725119: step: 172/77, loss: 0.018024412915110588 2023-01-23 23:14:45.005445: step: 176/77, loss: 0.00041163599235005677 2023-01-23 23:14:46.307113: step: 180/77, loss: 0.028581075370311737 2023-01-23 23:14:47.591646: step: 184/77, loss: 0.007999785244464874 2023-01-23 23:14:48.893610: step: 188/77, loss: 0.0047058360651135445 2023-01-23 23:14:50.207639: step: 192/77, loss: 0.007076134905219078 2023-01-23 23:14:51.540762: step: 196/77, loss: 0.021052701398730278 2023-01-23 23:14:52.905074: step: 200/77, loss: 0.09484566748142242 2023-01-23 23:14:54.246509: step: 204/77, loss: 0.0005660290480591357 2023-01-23 23:14:55.536386: step: 208/77, loss: 0.02472056820988655 2023-01-23 23:14:56.823686: step: 212/77, loss: 0.006819822359830141 2023-01-23 23:14:58.149932: step: 216/77, loss: 0.0009203726658597589 2023-01-23 23:14:59.448378: step: 220/77, loss: 0.001978323794901371 2023-01-23 23:15:00.766229: step: 224/77, loss: 0.0022479835897684097 2023-01-23 23:15:02.089570: step: 228/77, loss: 0.0005001539830118418 2023-01-23 23:15:03.435373: step: 232/77, loss: 0.0036140254233032465 2023-01-23 23:15:04.685639: step: 236/77, loss: 0.018563125282526016 2023-01-23 23:15:05.993134: step: 240/77, loss: 0.00011719368922058493 2023-01-23 23:15:07.243002: step: 244/77, loss: 0.0018171144183725119 2023-01-23 23:15:08.548804: step: 248/77, loss: 3.602403012337163e-05 2023-01-23 23:15:09.866808: step: 252/77, loss: 2.7164055609318893e-06 2023-01-23 23:15:11.124227: step: 256/77, loss: 0.0027795173227787018 2023-01-23 23:15:12.406654: step: 260/77, loss: 0.05980942025780678 2023-01-23 23:15:13.718801: step: 264/77, loss: 0.002729016589000821 2023-01-23 23:15:15.092039: step: 268/77, loss: 0.0014211706584319472 2023-01-23 23:15:16.407413: step: 272/77, loss: 0.0069159846752882 2023-01-23 23:15:17.736293: step: 276/77, loss: 2.6452304155100137e-05 2023-01-23 23:15:19.033066: step: 280/77, loss: 0.0017069733003154397 2023-01-23 23:15:20.393522: step: 284/77, loss: 0.05138189718127251 2023-01-23 23:15:21.639644: step: 288/77, loss: 0.007211971562355757 2023-01-23 23:15:22.880661: step: 292/77, loss: 0.001633265521377325 2023-01-23 23:15:24.152057: step: 296/77, loss: 0.037424977868795395 2023-01-23 23:15:25.467037: step: 300/77, loss: 0.001969374716281891 2023-01-23 23:15:26.782222: step: 304/77, loss: 0.0008761576609686017 2023-01-23 23:15:28.046684: step: 308/77, loss: 0.0007793945842422545 2023-01-23 23:15:29.372969: step: 312/77, loss: 0.018580691888928413 2023-01-23 23:15:30.625117: step: 316/77, loss: 0.0016993844183161855 2023-01-23 23:15:31.864214: step: 320/77, loss: 0.0018329013837501407 2023-01-23 23:15:33.164303: step: 324/77, loss: 0.0004931208095513284 2023-01-23 23:15:34.512324: step: 328/77, loss: 8.924649591790512e-05 2023-01-23 23:15:35.800430: step: 332/77, loss: 0.03662414103746414 2023-01-23 23:15:37.128945: step: 336/77, loss: 0.025226496160030365 2023-01-23 23:15:38.368851: step: 340/77, loss: 0.013443954288959503 2023-01-23 23:15:39.681592: step: 344/77, loss: 0.04253612831234932 2023-01-23 23:15:41.000687: step: 348/77, loss: 0.008895068429410458 2023-01-23 23:15:42.346889: step: 352/77, loss: 0.008363965898752213 2023-01-23 23:15:43.642894: step: 356/77, loss: 0.0023347344249486923 2023-01-23 23:15:44.951756: step: 360/77, loss: 0.0045925406739115715 2023-01-23 23:15:46.241382: step: 364/77, loss: 0.00017636397387832403 2023-01-23 23:15:47.575781: step: 368/77, loss: 0.009718219749629498 2023-01-23 23:15:48.916897: step: 372/77, loss: 0.009809928946197033 2023-01-23 23:15:50.216117: step: 376/77, loss: 4.1718150896485895e-05 2023-01-23 23:15:51.508996: step: 380/77, loss: 0.034828729927539825 2023-01-23 23:15:52.839830: step: 384/77, loss: 0.005780613049864769 2023-01-23 23:15:54.115907: step: 388/77, loss: 0.0001409717951901257 ================================================== Loss: 0.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Chinese: {'template': {'p': 0.9696969696969697, 'r': 0.5079365079365079, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.01804123711340206, 'f1': 0.03499999999999999}, 'combined': 0.023333333333333324, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Korean: {'template': {'p': 0.9696969696969697, 'r': 0.5079365079365079, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.01804123711340206, 'f1': 0.03499999999999999}, 'combined': 0.023333333333333324, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Russian: {'template': {'p': 0.9701492537313433, 'r': 0.5158730158730159, 'f1': 0.6735751295336788}, 'slot': {'p': 0.5675675675675675, 'r': 0.01804123711340206, 'f1': 0.03497085761865112}, 'combined': 0.023555499950386766, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 13 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:17:35.904613: step: 4/77, loss: 0.004952570889145136 2023-01-23 23:17:37.225106: step: 8/77, loss: 0.00018064001051243395 2023-01-23 23:17:38.557105: step: 12/77, loss: 0.08218786865472794 2023-01-23 23:17:39.832101: step: 16/77, loss: 0.025318488478660583 2023-01-23 23:17:41.187101: step: 20/77, loss: 0.0022081886418163776 2023-01-23 23:17:42.537405: step: 24/77, loss: 0.017943235114216805 2023-01-23 23:17:43.834811: step: 28/77, loss: 8.053469355218112e-05 2023-01-23 23:17:45.163328: step: 32/77, loss: 0.00113627128303051 2023-01-23 23:17:46.481717: step: 36/77, loss: 4.863796493737027e-05 2023-01-23 23:17:47.777710: step: 40/77, loss: 0.013745257630944252 2023-01-23 23:17:49.062808: step: 44/77, loss: 5.027527367928997e-05 2023-01-23 23:17:50.325301: step: 48/77, loss: 0.02231896109879017 2023-01-23 23:17:51.598881: step: 52/77, loss: 0.005378996953368187 2023-01-23 23:17:52.894434: step: 56/77, loss: 0.1697475165128708 2023-01-23 23:17:54.188073: step: 60/77, loss: 0.0009837471880018711 2023-01-23 23:17:55.468609: step: 64/77, loss: 0.000497913861181587 2023-01-23 23:17:56.799288: step: 68/77, loss: 0.0005649970844388008 2023-01-23 23:17:58.120067: step: 72/77, loss: 0.005117279943078756 2023-01-23 23:17:59.411451: step: 76/77, loss: 0.0029834150336682796 2023-01-23 23:18:00.734839: step: 80/77, loss: 0.0036763963289558887 2023-01-23 23:18:02.038174: step: 84/77, loss: 0.002525686053559184 2023-01-23 23:18:03.342972: step: 88/77, loss: 0.030388498678803444 2023-01-23 23:18:04.606841: step: 92/77, loss: 0.04918051138520241 2023-01-23 23:18:05.871386: step: 96/77, loss: 0.01887078583240509 2023-01-23 23:18:07.182202: step: 100/77, loss: 0.005926585290580988 2023-01-23 23:18:08.535513: step: 104/77, loss: 0.004645318258553743 2023-01-23 23:18:09.859853: step: 108/77, loss: 0.008219257928431034 2023-01-23 23:18:11.164135: step: 112/77, loss: 0.00023844148381613195 2023-01-23 23:18:12.494159: step: 116/77, loss: 0.0098537253215909 2023-01-23 23:18:13.758829: step: 120/77, loss: 0.00490536680445075 2023-01-23 23:18:15.050638: step: 124/77, loss: 0.0024474281817674637 2023-01-23 23:18:16.380710: step: 128/77, loss: 0.014606594108045101 2023-01-23 23:18:17.702303: step: 132/77, loss: 0.00835402775555849 2023-01-23 23:18:19.012972: step: 136/77, loss: 0.3284963369369507 2023-01-23 23:18:20.378388: step: 140/77, loss: 0.003163279267027974 2023-01-23 23:18:21.721797: step: 144/77, loss: 0.0027168530505150557 2023-01-23 23:18:23.015744: step: 148/77, loss: 0.001842327183112502 2023-01-23 23:18:24.275620: step: 152/77, loss: 0.002845626324415207 2023-01-23 23:18:25.537822: step: 156/77, loss: 0.0007997804787009954 2023-01-23 23:18:26.840405: step: 160/77, loss: 0.017090700566768646 2023-01-23 23:18:28.129490: step: 164/77, loss: 0.003446019720286131 2023-01-23 23:18:29.461272: step: 168/77, loss: 0.05163341388106346 2023-01-23 23:18:30.747806: step: 172/77, loss: 0.0038207899779081345 2023-01-23 23:18:32.047377: step: 176/77, loss: 0.06080837547779083 2023-01-23 23:18:33.364562: step: 180/77, loss: 0.03154751658439636 2023-01-23 23:18:34.709803: step: 184/77, loss: 8.086367597570643e-05 2023-01-23 23:18:36.033795: step: 188/77, loss: 2.9138493118807673e-05 2023-01-23 23:18:37.332532: step: 192/77, loss: 0.022798681631684303 2023-01-23 23:18:38.669504: step: 196/77, loss: 0.02658800594508648 2023-01-23 23:18:39.952094: step: 200/77, loss: 0.05497226119041443 2023-01-23 23:18:41.270042: step: 204/77, loss: 0.0023539061658084393 2023-01-23 23:18:42.609486: step: 208/77, loss: 0.11826759576797485 2023-01-23 23:18:43.846256: step: 212/77, loss: 0.0033361660316586494 2023-01-23 23:18:45.133986: step: 216/77, loss: 0.004443520680069923 2023-01-23 23:18:46.426354: step: 220/77, loss: 0.03105931170284748 2023-01-23 23:18:47.731015: step: 224/77, loss: 0.011646389029920101 2023-01-23 23:18:48.996106: step: 228/77, loss: 0.026408933103084564 2023-01-23 23:18:50.340679: step: 232/77, loss: 0.0027887923642992973 2023-01-23 23:18:51.644078: step: 236/77, loss: 0.005191397853195667 2023-01-23 23:18:52.947211: step: 240/77, loss: 0.0014639300061389804 2023-01-23 23:18:54.223708: step: 244/77, loss: 0.04769325628876686 2023-01-23 23:18:55.525358: step: 248/77, loss: 0.0005501174600794911 2023-01-23 23:18:56.872118: step: 252/77, loss: 0.03677041456103325 2023-01-23 23:18:58.170011: step: 256/77, loss: 0.023332320153713226 2023-01-23 23:18:59.481324: step: 260/77, loss: 0.001704951049759984 2023-01-23 23:19:00.785117: step: 264/77, loss: 0.0013226951705291867 2023-01-23 23:19:02.165312: step: 268/77, loss: 0.0008604861213825643 2023-01-23 23:19:03.501107: step: 272/77, loss: 0.0003232818271499127 2023-01-23 23:19:04.810192: step: 276/77, loss: 0.00019440895994193852 2023-01-23 23:19:06.159706: step: 280/77, loss: 0.0014171466464176774 2023-01-23 23:19:07.443555: step: 284/77, loss: 0.0032911188900470734 2023-01-23 23:19:08.786437: step: 288/77, loss: 5.382840390666388e-05 2023-01-23 23:19:10.133197: step: 292/77, loss: 0.000356964796083048 2023-01-23 23:19:11.487023: step: 296/77, loss: 0.003755199024453759 2023-01-23 23:19:12.780929: step: 300/77, loss: 0.015594424679875374 2023-01-23 23:19:14.089266: step: 304/77, loss: 0.025219272822141647 2023-01-23 23:19:15.382451: step: 308/77, loss: 0.0034283148124814034 2023-01-23 23:19:16.699842: step: 312/77, loss: 0.0035809995606541634 2023-01-23 23:19:17.994275: step: 316/77, loss: 0.0005085446173325181 2023-01-23 23:19:19.343587: step: 320/77, loss: 0.0023056501522660255 2023-01-23 23:19:20.628976: step: 324/77, loss: 0.017061032354831696 2023-01-23 23:19:21.904483: step: 328/77, loss: 0.0009639563504606485 2023-01-23 23:19:23.207439: step: 332/77, loss: 0.04023989662528038 2023-01-23 23:19:24.484896: step: 336/77, loss: 1.9297449398436584e-05 2023-01-23 23:19:25.731387: step: 340/77, loss: 0.027431517839431763 2023-01-23 23:19:27.034620: step: 344/77, loss: 0.009087876416742802 2023-01-23 23:19:28.315001: step: 348/77, loss: 0.00010213730274699628 2023-01-23 23:19:29.645576: step: 352/77, loss: 0.0021237514447420835 2023-01-23 23:19:30.971232: step: 356/77, loss: 0.02595115266740322 2023-01-23 23:19:32.356272: step: 360/77, loss: 0.033148620277643204 2023-01-23 23:19:33.655082: step: 364/77, loss: 2.347496774746105e-05 2023-01-23 23:19:34.960385: step: 368/77, loss: 0.0004097193304914981 2023-01-23 23:19:36.262371: step: 372/77, loss: 0.04141872003674507 2023-01-23 23:19:37.582963: step: 376/77, loss: 0.0002173631073674187 2023-01-23 23:19:38.926737: step: 380/77, loss: 0.0032450349535793066 2023-01-23 23:19:40.253302: step: 384/77, loss: 0.0003417402331251651 2023-01-23 23:19:41.598871: step: 388/77, loss: 1.6842212062329054e-05 ================================================== Loss: 0.018 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 13} Test Chinese: {'template': {'p': 0.9701492537313433, 'r': 0.5158730158730159, 'f1': 0.6735751295336788}, 'slot': {'p': 0.6, 'r': 0.01288659793814433, 'f1': 0.025231286795626577}, 'combined': 0.01699516727166557, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 13} Test Korean: {'template': {'p': 0.9696969696969697, 'r': 0.5079365079365079, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.01288659793814433, 'f1': 0.025231286795626577}, 'combined': 0.01682085786375105, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 13} Test Russian: {'template': {'p': 0.984375, 'r': 0.5, 'f1': 0.6631578947368421}, 'slot': {'p': 0.5925925925925926, 'r': 0.013745704467353952, 'f1': 0.026868178001679264}, 'combined': 0.017817844359008354, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 14 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:21:23.694660: step: 4/77, loss: 0.0002736255992203951 2023-01-23 23:21:24.979279: step: 8/77, loss: 0.0004504866083152592 2023-01-23 23:21:26.264818: step: 12/77, loss: 0.00023320181935559958 2023-01-23 23:21:27.550351: step: 16/77, loss: 0.049800168722867966 2023-01-23 23:21:28.905462: step: 20/77, loss: 0.03142474219202995 2023-01-23 23:21:30.242620: step: 24/77, loss: 0.00851297378540039 2023-01-23 23:21:31.536573: step: 28/77, loss: 0.00548419076949358 2023-01-23 23:21:32.856895: step: 32/77, loss: 5.586273800872732e-06 2023-01-23 23:21:34.129374: step: 36/77, loss: 0.0012668132549151778 2023-01-23 23:21:35.420386: step: 40/77, loss: 0.08734573423862457 2023-01-23 23:21:36.729252: step: 44/77, loss: 0.00010684480366762727 2023-01-23 23:21:38.047776: step: 48/77, loss: 0.0009850130882114172 2023-01-23 23:21:39.376919: step: 52/77, loss: 0.034376420080661774 2023-01-23 23:21:40.661371: step: 56/77, loss: 0.0007986004929989576 2023-01-23 23:21:41.969410: step: 60/77, loss: 0.003285888582468033 2023-01-23 23:21:43.271285: step: 64/77, loss: 9.500126907369122e-05 2023-01-23 23:21:44.613665: step: 68/77, loss: 0.040808241814374924 2023-01-23 23:21:45.922398: step: 72/77, loss: 0.0010058830957859755 2023-01-23 23:21:47.205234: step: 76/77, loss: 0.019482817500829697 2023-01-23 23:21:48.520275: step: 80/77, loss: 0.004581984132528305 2023-01-23 23:21:49.808531: step: 84/77, loss: 0.008562111295759678 2023-01-23 23:21:51.086990: step: 88/77, loss: 0.0005003288970328867 2023-01-23 23:21:52.429270: step: 92/77, loss: 0.013277137652039528 2023-01-23 23:21:53.685634: step: 96/77, loss: 0.0015275046462193131 2023-01-23 23:21:54.992827: step: 100/77, loss: 0.04671543464064598 2023-01-23 23:21:56.297600: step: 104/77, loss: 0.0071679409593343735 2023-01-23 23:21:57.618030: step: 108/77, loss: 0.022933460772037506 2023-01-23 23:21:58.942638: step: 112/77, loss: 0.02628178894519806 2023-01-23 23:22:00.246835: step: 116/77, loss: 0.00014312085113488138 2023-01-23 23:22:01.533004: step: 120/77, loss: 0.020610585808753967 2023-01-23 23:22:02.825821: step: 124/77, loss: 0.01356650423258543 2023-01-23 23:22:04.089110: step: 128/77, loss: 0.0012072846293449402 2023-01-23 23:22:05.406242: step: 132/77, loss: 0.025142788887023926 2023-01-23 23:22:06.692225: step: 136/77, loss: 0.055683743208646774 2023-01-23 23:22:07.991688: step: 140/77, loss: 0.0024060329888015985 2023-01-23 23:22:09.308289: step: 144/77, loss: 3.852570807794109e-05 2023-01-23 23:22:10.569827: step: 148/77, loss: 0.0007876998279243708 2023-01-23 23:22:11.891113: step: 152/77, loss: 7.95004962128587e-05 2023-01-23 23:22:13.159734: step: 156/77, loss: 0.03436954692006111 2023-01-23 23:22:14.462517: step: 160/77, loss: 0.02941078506410122 2023-01-23 23:22:15.766471: step: 164/77, loss: 0.0010085589019581676 2023-01-23 23:22:17.077208: step: 168/77, loss: 0.047466427087783813 2023-01-23 23:22:18.366171: step: 172/77, loss: 0.005795814096927643 2023-01-23 23:22:19.706400: step: 176/77, loss: 9.644380770623684e-05 2023-01-23 23:22:21.008258: step: 180/77, loss: 0.006475800182670355 2023-01-23 23:22:22.256927: step: 184/77, loss: 0.0005885373684577644 2023-01-23 23:22:23.597483: step: 188/77, loss: 0.01506776362657547 2023-01-23 23:22:24.921752: step: 192/77, loss: 0.05868987739086151 2023-01-23 23:22:26.214654: step: 196/77, loss: 0.019701024517416954 2023-01-23 23:22:27.571648: step: 200/77, loss: 0.00040737222298048437 2023-01-23 23:22:28.882322: step: 204/77, loss: 0.0006639646599069238 2023-01-23 23:22:30.251511: step: 208/77, loss: 0.00018083356553688645 2023-01-23 23:22:31.600333: step: 212/77, loss: 0.059999510645866394 2023-01-23 23:22:32.889027: step: 216/77, loss: 0.0008782768272794783 2023-01-23 23:22:34.160758: step: 220/77, loss: 0.020827846601605415 2023-01-23 23:22:35.469844: step: 224/77, loss: 0.002391217742115259 2023-01-23 23:22:36.812826: step: 228/77, loss: 0.0037284065037965775 2023-01-23 23:22:38.113615: step: 232/77, loss: 0.02475815825164318 2023-01-23 23:22:39.437792: step: 236/77, loss: 6.199297058628872e-05 2023-01-23 23:22:40.761083: step: 240/77, loss: 0.029774367809295654 2023-01-23 23:22:42.074668: step: 244/77, loss: 0.002180825686082244 2023-01-23 23:22:43.360145: step: 248/77, loss: 0.0032102155964821577 2023-01-23 23:22:44.648282: step: 252/77, loss: 0.0031334550585597754 2023-01-23 23:22:45.895292: step: 256/77, loss: 6.322468107100576e-05 2023-01-23 23:22:47.188490: step: 260/77, loss: 0.025384191423654556 2023-01-23 23:22:48.478363: step: 264/77, loss: 0.0002646015491336584 2023-01-23 23:22:49.765621: step: 268/77, loss: 8.781399628787767e-06 2023-01-23 23:22:51.129609: step: 272/77, loss: 0.03080436773598194 2023-01-23 23:22:52.431032: step: 276/77, loss: 0.00026560225524008274 2023-01-23 23:22:53.790137: step: 280/77, loss: 0.005289306398481131 2023-01-23 23:22:55.096630: step: 284/77, loss: 0.0034095204900950193 2023-01-23 23:22:56.390881: step: 288/77, loss: 0.004925249610096216 2023-01-23 23:22:57.710127: step: 292/77, loss: 3.067413854296319e-05 2023-01-23 23:22:59.064077: step: 296/77, loss: 3.733620178536512e-05 2023-01-23 23:23:00.323400: step: 300/77, loss: 0.00012354001228231937 2023-01-23 23:23:01.663922: step: 304/77, loss: 0.004708366468548775 2023-01-23 23:23:03.009127: step: 308/77, loss: 0.009184081107378006 2023-01-23 23:23:04.366363: step: 312/77, loss: 0.032576557248830795 2023-01-23 23:23:05.687792: step: 316/77, loss: 0.04805753007531166 2023-01-23 23:23:06.981211: step: 320/77, loss: 0.000643449486233294 2023-01-23 23:23:08.306235: step: 324/77, loss: 0.00031785358441993594 2023-01-23 23:23:09.613366: step: 328/77, loss: 0.004113791044801474 2023-01-23 23:23:10.929993: step: 332/77, loss: 0.002717088907957077 2023-01-23 23:23:12.221161: step: 336/77, loss: 0.05559927225112915 2023-01-23 23:23:13.544186: step: 340/77, loss: 0.009818648919463158 2023-01-23 23:23:14.884464: step: 344/77, loss: 0.028968963772058487 2023-01-23 23:23:16.105291: step: 348/77, loss: 0.0009838235564529896 2023-01-23 23:23:17.463171: step: 352/77, loss: 0.0022649941965937614 2023-01-23 23:23:18.739505: step: 356/77, loss: 0.00011140467540826648 2023-01-23 23:23:20.030104: step: 360/77, loss: 0.006448336876928806 2023-01-23 23:23:21.326902: step: 364/77, loss: 0.0014627741184085608 2023-01-23 23:23:22.627309: step: 368/77, loss: 0.0010857736924663186 2023-01-23 23:23:23.948944: step: 372/77, loss: 0.013371062465012074 2023-01-23 23:23:25.230783: step: 376/77, loss: 0.0003074152336921543 2023-01-23 23:23:26.558848: step: 380/77, loss: 0.0006239335052669048 2023-01-23 23:23:27.848340: step: 384/77, loss: 0.027297526597976685 2023-01-23 23:23:29.164357: step: 388/77, loss: 1.0845969882211648e-05 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04247787610619469, 'epoch': 14} Test Chinese: {'template': {'p': 0.9857142857142858, 'r': 0.5476190476190477, 'f1': 0.7040816326530612}, 'slot': {'p': 0.5897435897435898, 'r': 0.019759450171821305, 'f1': 0.03823773898586866}, 'combined': 0.026922489694132013, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04247787610619469, 'epoch': 14} Test Korean: {'template': {'p': 0.9857142857142858, 'r': 0.5476190476190477, 'f1': 0.7040816326530612}, 'slot': {'p': 0.5789473684210527, 'r': 0.018900343642611683, 'f1': 0.03660565723793677}, 'combined': 0.02577337091242487, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04247787610619469, 'epoch': 14} Test Russian: {'template': {'p': 0.9857142857142858, 'r': 0.5476190476190477, 'f1': 0.7040816326530612}, 'slot': {'p': 0.5945945945945946, 'r': 0.018900343642611683, 'f1': 0.0366361365528726}, 'combined': 0.025794830838247036, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 15 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:25:10.994541: step: 4/77, loss: 1.4878492038405966e-05 2023-01-23 23:25:12.324801: step: 8/77, loss: 4.9835511163109913e-05 2023-01-23 23:25:13.670672: step: 12/77, loss: 0.00017194103565998375 2023-01-23 23:25:14.955832: step: 16/77, loss: 0.0014435353223234415 2023-01-23 23:25:16.267507: step: 20/77, loss: 0.003427227959036827 2023-01-23 23:25:17.578604: step: 24/77, loss: 0.02076116017997265 2023-01-23 23:25:18.910428: step: 28/77, loss: 0.03030756488442421 2023-01-23 23:25:20.256269: step: 32/77, loss: 0.008024593815207481 2023-01-23 23:25:21.600923: step: 36/77, loss: 0.02782297693192959 2023-01-23 23:25:22.876312: step: 40/77, loss: 9.715352007333422e-07 2023-01-23 23:25:24.175278: step: 44/77, loss: 0.015568568371236324 2023-01-23 23:25:25.474807: step: 48/77, loss: 0.0001185145738418214 2023-01-23 23:25:26.740752: step: 52/77, loss: 0.03457537293434143 2023-01-23 23:25:27.994034: step: 56/77, loss: 0.015236059203743935 2023-01-23 23:25:29.305079: step: 60/77, loss: 0.0051247659139335155 2023-01-23 23:25:30.617640: step: 64/77, loss: 0.03789728879928589 2023-01-23 23:25:31.877559: step: 68/77, loss: 0.03683306649327278 2023-01-23 23:25:33.224780: step: 72/77, loss: 0.00116816780064255 2023-01-23 23:25:34.509456: step: 76/77, loss: 0.016729649156332016 2023-01-23 23:25:35.863511: step: 80/77, loss: 1.3653680071001872e-05 2023-01-23 23:25:37.173211: step: 84/77, loss: 0.006376555189490318 2023-01-23 23:25:38.546744: step: 88/77, loss: 0.0009691191953606904 2023-01-23 23:25:39.858621: step: 92/77, loss: 0.0029876306653022766 2023-01-23 23:25:41.174101: step: 96/77, loss: 0.042058952152729034 2023-01-23 23:25:42.500331: step: 100/77, loss: 8.360248466487974e-05 2023-01-23 23:25:43.821013: step: 104/77, loss: 0.0072425431571900845 2023-01-23 23:25:45.087255: step: 108/77, loss: 0.003880678676068783 2023-01-23 23:25:46.363479: step: 112/77, loss: 0.0008597972337156534 2023-01-23 23:25:47.701606: step: 116/77, loss: 0.0099770687520504 2023-01-23 23:25:49.001109: step: 120/77, loss: 0.010671457275748253 2023-01-23 23:25:50.327141: step: 124/77, loss: 0.00871829129755497 2023-01-23 23:25:51.640823: step: 128/77, loss: 3.0166500437189825e-05 2023-01-23 23:25:52.979462: step: 132/77, loss: 0.00016145638073794544 2023-01-23 23:25:54.305342: step: 136/77, loss: 3.830725290754344e-06 2023-01-23 23:25:55.640454: step: 140/77, loss: 0.0043679047375917435 2023-01-23 23:25:56.921605: step: 144/77, loss: 0.00938387494534254 2023-01-23 23:25:58.264460: step: 148/77, loss: 0.0004537670756690204 2023-01-23 23:25:59.580846: step: 152/77, loss: 7.381376235571224e-06 2023-01-23 23:26:00.892935: step: 156/77, loss: 0.0018163879867643118 2023-01-23 23:26:02.200150: step: 160/77, loss: 0.012446406297385693 2023-01-23 23:26:03.545470: step: 164/77, loss: 0.0003807510656770319 2023-01-23 23:26:04.825631: step: 168/77, loss: 0.005677470006048679 2023-01-23 23:26:06.104247: step: 172/77, loss: 0.0012833502842113376 2023-01-23 23:26:07.400540: step: 176/77, loss: 0.04827876389026642 2023-01-23 23:26:08.675069: step: 180/77, loss: 0.029316775500774384 2023-01-23 23:26:09.931026: step: 184/77, loss: 9.630203749111388e-06 2023-01-23 23:26:11.231467: step: 188/77, loss: 1.1573029951250646e-05 2023-01-23 23:26:12.596477: step: 192/77, loss: 0.0010241689160466194 2023-01-23 23:26:13.906927: step: 196/77, loss: 0.00017882336396723986 2023-01-23 23:26:15.241149: step: 200/77, loss: 0.004142004065215588 2023-01-23 23:26:16.602271: step: 204/77, loss: 7.169664604589343e-05 2023-01-23 23:26:17.878826: step: 208/77, loss: 0.0007131965248845518 2023-01-23 23:26:19.179060: step: 212/77, loss: 0.002486684825271368 2023-01-23 23:26:20.479574: step: 216/77, loss: 0.032599691301584244 2023-01-23 23:26:21.819189: step: 220/77, loss: 0.0008860656525939703 2023-01-23 23:26:23.203385: step: 224/77, loss: 0.11878086626529694 2023-01-23 23:26:24.558094: step: 228/77, loss: 0.0019146227277815342 2023-01-23 23:26:25.877109: step: 232/77, loss: 0.00997019000351429 2023-01-23 23:26:27.132735: step: 236/77, loss: 0.0006213907618075609 2023-01-23 23:26:28.443989: step: 240/77, loss: 7.765216287225485e-05 2023-01-23 23:26:29.790003: step: 244/77, loss: 0.011932688765227795 2023-01-23 23:26:31.133249: step: 248/77, loss: 1.8564012862043455e-05 2023-01-23 23:26:32.464650: step: 252/77, loss: 0.020145785063505173 2023-01-23 23:26:33.757886: step: 256/77, loss: 0.008753478527069092 2023-01-23 23:26:35.098844: step: 260/77, loss: 0.0072768256068229675 2023-01-23 23:26:36.437499: step: 264/77, loss: 0.03789191693067551 2023-01-23 23:26:37.804198: step: 268/77, loss: 0.0030572679825127125 2023-01-23 23:26:39.124232: step: 272/77, loss: 0.00020431546727195382 2023-01-23 23:26:40.475208: step: 276/77, loss: 0.0017906144494190812 2023-01-23 23:26:41.767963: step: 280/77, loss: 5.944917575106956e-05 2023-01-23 23:26:43.104208: step: 284/77, loss: 0.028044767677783966 2023-01-23 23:26:44.433343: step: 288/77, loss: 0.0019235184881836176 2023-01-23 23:26:45.715454: step: 292/77, loss: 1.2465928193705622e-05 2023-01-23 23:26:46.972036: step: 296/77, loss: 0.005761981941759586 2023-01-23 23:26:48.299528: step: 300/77, loss: 0.00087132235057652 2023-01-23 23:26:49.589327: step: 304/77, loss: 0.007014409638941288 2023-01-23 23:26:50.914590: step: 308/77, loss: 0.0032073655165731907 2023-01-23 23:26:52.214402: step: 312/77, loss: 0.03255422040820122 2023-01-23 23:26:53.500980: step: 316/77, loss: 0.06079493835568428 2023-01-23 23:26:54.805438: step: 320/77, loss: 0.0003763463464565575 2023-01-23 23:26:56.129244: step: 324/77, loss: 0.032440174371004105 2023-01-23 23:26:57.452099: step: 328/77, loss: 0.0023321935441344976 2023-01-23 23:26:58.777247: step: 332/77, loss: 5.185348345548846e-05 2023-01-23 23:27:00.092676: step: 336/77, loss: 8.398528734687716e-05 2023-01-23 23:27:01.410779: step: 340/77, loss: 0.033188119530677795 2023-01-23 23:27:02.778141: step: 344/77, loss: 0.0655626654624939 2023-01-23 23:27:04.073432: step: 348/77, loss: 0.08891092240810394 2023-01-23 23:27:05.370656: step: 352/77, loss: 0.008576600812375546 2023-01-23 23:27:06.629191: step: 356/77, loss: 0.0025107869878411293 2023-01-23 23:27:07.931013: step: 360/77, loss: 0.00036133453249931335 2023-01-23 23:27:09.248105: step: 364/77, loss: 0.018196951597929 2023-01-23 23:27:10.528301: step: 368/77, loss: 0.009679058566689491 2023-01-23 23:27:11.842007: step: 372/77, loss: 0.002200314775109291 2023-01-23 23:27:13.197960: step: 376/77, loss: 0.010349934920668602 2023-01-23 23:27:14.575988: step: 380/77, loss: 5.733857324230485e-05 2023-01-23 23:27:15.907806: step: 384/77, loss: 0.0001564957929076627 2023-01-23 23:27:17.206254: step: 388/77, loss: 0.0032057648058980703 ================================================== Loss: 0.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 15} Test Chinese: {'template': {'p': 0.9705882352941176, 'r': 0.5238095238095238, 'f1': 0.6804123711340205}, 'slot': {'p': 0.5666666666666667, 'r': 0.014604810996563574, 'f1': 0.028475711892797323}, 'combined': 0.019375226648707455, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 15} Test Korean: {'template': {'p': 0.9705882352941176, 'r': 0.5238095238095238, 'f1': 0.6804123711340205}, 'slot': {'p': 0.6, 'r': 0.015463917525773196, 'f1': 0.03015075376884422}, 'combined': 0.0205149458633373, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 15} Test Russian: {'template': {'p': 0.9705882352941176, 'r': 0.5238095238095238, 'f1': 0.6804123711340205}, 'slot': {'p': 0.5862068965517241, 'r': 0.014604810996563574, 'f1': 0.02849958088851635}, 'combined': 0.019391467408681227, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 16 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:28:59.834908: step: 4/77, loss: 0.0007765126065351069 2023-01-23 23:29:01.129311: step: 8/77, loss: 0.00012015578977297992 2023-01-23 23:29:02.455201: step: 12/77, loss: 0.008260026574134827 2023-01-23 23:29:03.777294: step: 16/77, loss: 0.00024017225950956345 2023-01-23 23:29:05.071207: step: 20/77, loss: 0.003737919731065631 2023-01-23 23:29:06.345028: step: 24/77, loss: 0.004641602281481028 2023-01-23 23:29:07.661282: step: 28/77, loss: 0.0012632752768695354 2023-01-23 23:29:08.987567: step: 32/77, loss: 0.0055075702257454395 2023-01-23 23:29:10.296417: step: 36/77, loss: 0.030879627913236618 2023-01-23 23:29:11.596803: step: 40/77, loss: 0.004773629363626242 2023-01-23 23:29:12.889368: step: 44/77, loss: 0.02421536110341549 2023-01-23 23:29:14.181186: step: 48/77, loss: 0.0026690459344536066 2023-01-23 23:29:15.450747: step: 52/77, loss: 0.001229959074407816 2023-01-23 23:29:16.733577: step: 56/77, loss: 7.954640022944659e-05 2023-01-23 23:29:18.053967: step: 60/77, loss: 0.00868784636259079 2023-01-23 23:29:19.370421: step: 64/77, loss: 0.003013339824974537 2023-01-23 23:29:20.676153: step: 68/77, loss: 0.00032769900280982256 2023-01-23 23:29:21.959810: step: 72/77, loss: 0.0007191248587332666 2023-01-23 23:29:23.265900: step: 76/77, loss: 7.872861169744283e-05 2023-01-23 23:29:24.566552: step: 80/77, loss: 0.020206786692142487 2023-01-23 23:29:25.855711: step: 84/77, loss: 0.016133133322000504 2023-01-23 23:29:27.157079: step: 88/77, loss: 0.00040447746869176626 2023-01-23 23:29:28.494791: step: 92/77, loss: 0.02237563207745552 2023-01-23 23:29:29.769253: step: 96/77, loss: 0.002664468716830015 2023-01-23 23:29:31.086240: step: 100/77, loss: 0.0001514804025646299 2023-01-23 23:29:32.382197: step: 104/77, loss: 6.90346205374226e-05 2023-01-23 23:29:33.705265: step: 108/77, loss: 0.0008093639044091105 2023-01-23 23:29:34.977523: step: 112/77, loss: 0.000921435363125056 2023-01-23 23:29:36.350445: step: 116/77, loss: 0.006500033661723137 2023-01-23 23:29:37.672364: step: 120/77, loss: 4.7151137550827116e-05 2023-01-23 23:29:39.011272: step: 124/77, loss: 0.0004083859676029533 2023-01-23 23:29:40.305068: step: 128/77, loss: 0.07341621071100235 2023-01-23 23:29:41.655150: step: 132/77, loss: 6.460564327426255e-05 2023-01-23 23:29:42.929188: step: 136/77, loss: 0.008727012202143669 2023-01-23 23:29:44.177856: step: 140/77, loss: 0.03657951205968857 2023-01-23 23:29:45.458945: step: 144/77, loss: 0.00010166480933548883 2023-01-23 23:29:46.774153: step: 148/77, loss: 6.464680154749658e-06 2023-01-23 23:29:48.065316: step: 152/77, loss: 0.00010319374996470287 2023-01-23 23:29:49.334231: step: 156/77, loss: 0.03454245626926422 2023-01-23 23:29:50.650494: step: 160/77, loss: 0.005117420572787523 2023-01-23 23:29:51.948596: step: 164/77, loss: 0.006901135668158531 2023-01-23 23:29:53.243243: step: 168/77, loss: 0.000240602734265849 2023-01-23 23:29:54.570764: step: 172/77, loss: 0.1016627624630928 2023-01-23 23:29:55.869262: step: 176/77, loss: 0.00012070147931808606 2023-01-23 23:29:57.184419: step: 180/77, loss: 4.286599505576305e-05 2023-01-23 23:29:58.447523: step: 184/77, loss: 0.00014440326776821166 2023-01-23 23:29:59.742420: step: 188/77, loss: 0.0006260947557166219 2023-01-23 23:30:01.063367: step: 192/77, loss: 0.0010012636194005609 2023-01-23 23:30:02.396854: step: 196/77, loss: 0.002178141148760915 2023-01-23 23:30:03.680616: step: 200/77, loss: 0.05275914445519447 2023-01-23 23:30:05.030115: step: 204/77, loss: 0.03453891724348068 2023-01-23 23:30:06.326553: step: 208/77, loss: 0.001776168355718255 2023-01-23 23:30:07.675290: step: 212/77, loss: 2.2828473447589204e-05 2023-01-23 23:30:08.977730: step: 216/77, loss: 0.06948499381542206 2023-01-23 23:30:10.270470: step: 220/77, loss: 0.015457428991794586 2023-01-23 23:30:11.558556: step: 224/77, loss: 0.026023317128419876 2023-01-23 23:30:12.865474: step: 228/77, loss: 0.042440950870513916 2023-01-23 23:30:14.180582: step: 232/77, loss: 8.644620538689196e-05 2023-01-23 23:30:15.541220: step: 236/77, loss: 0.019920025020837784 2023-01-23 23:30:16.909258: step: 240/77, loss: 0.0416274294257164 2023-01-23 23:30:18.216590: step: 244/77, loss: 3.288514926680364e-05 2023-01-23 23:30:19.575385: step: 248/77, loss: 0.0008118004188872874 2023-01-23 23:30:20.848456: step: 252/77, loss: 0.0020588578190654516 2023-01-23 23:30:22.167997: step: 256/77, loss: 0.0016918214969336987 2023-01-23 23:30:23.463113: step: 260/77, loss: 0.004299009684473276 2023-01-23 23:30:24.741966: step: 264/77, loss: 0.0018904039170593023 2023-01-23 23:30:26.065378: step: 268/77, loss: 0.003573576221242547 2023-01-23 23:30:27.356018: step: 272/77, loss: 0.0013117672642692924 2023-01-23 23:30:28.658705: step: 276/77, loss: 0.0018177537713199854 2023-01-23 23:30:29.998885: step: 280/77, loss: 0.0013126140693202615 2023-01-23 23:30:31.321732: step: 284/77, loss: 0.003976478241384029 2023-01-23 23:30:32.662342: step: 288/77, loss: 0.01663769781589508 2023-01-23 23:30:33.954546: step: 292/77, loss: 0.10017237067222595 2023-01-23 23:30:35.248193: step: 296/77, loss: 0.0033373809419572353 2023-01-23 23:30:36.545454: step: 300/77, loss: 0.002248018980026245 2023-01-23 23:30:37.865563: step: 304/77, loss: 0.10106675326824188 2023-01-23 23:30:39.143169: step: 308/77, loss: 0.0033508699852973223 2023-01-23 23:30:40.478493: step: 312/77, loss: 0.0002143883320968598 2023-01-23 23:30:41.771073: step: 316/77, loss: 0.020443376153707504 2023-01-23 23:30:43.077266: step: 320/77, loss: 0.00360716856084764 2023-01-23 23:30:44.363884: step: 324/77, loss: 0.0013300712453201413 2023-01-23 23:30:45.663256: step: 328/77, loss: 0.01074330136179924 2023-01-23 23:30:47.000705: step: 332/77, loss: 0.0008238847367465496 2023-01-23 23:30:48.298647: step: 336/77, loss: 0.005398381967097521 2023-01-23 23:30:49.579105: step: 340/77, loss: 2.7465936000226066e-05 2023-01-23 23:30:50.805950: step: 344/77, loss: 0.01161793153733015 2023-01-23 23:30:52.109789: step: 348/77, loss: 0.005161845590919256 2023-01-23 23:30:53.428578: step: 352/77, loss: 7.165823626564816e-05 2023-01-23 23:30:54.782493: step: 356/77, loss: 1.2400157174852211e-05 2023-01-23 23:30:56.115336: step: 360/77, loss: 0.0018668932607397437 2023-01-23 23:30:57.469857: step: 364/77, loss: 0.017863446846604347 2023-01-23 23:30:58.848787: step: 368/77, loss: 0.03986204415559769 2023-01-23 23:31:00.200714: step: 372/77, loss: 0.05790456384420395 2023-01-23 23:31:01.560869: step: 376/77, loss: 0.011296149343252182 2023-01-23 23:31:02.893838: step: 380/77, loss: 0.08306736499071121 2023-01-23 23:31:04.176120: step: 384/77, loss: 0.0009478795109316707 2023-01-23 23:31:05.435349: step: 388/77, loss: 0.00278334878385067 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Chinese: {'template': {'p': 0.9583333333333334, 'r': 0.5476190476190477, 'f1': 0.696969696969697}, 'slot': {'p': 0.6341463414634146, 'r': 0.022336769759450172, 'f1': 0.043153526970954356}, 'combined': 0.030076700616119705, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Korean: {'template': {'p': 0.9583333333333334, 'r': 0.5476190476190477, 'f1': 0.696969696969697}, 'slot': {'p': 0.625, 'r': 0.02147766323024055, 'f1': 0.04152823920265781}, 'combined': 0.028943924292761505, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Russian: {'template': {'p': 0.9583333333333334, 'r': 0.5476190476190477, 'f1': 0.696969696969697}, 'slot': {'p': 0.6341463414634146, 'r': 0.022336769759450172, 'f1': 0.043153526970954356}, 'combined': 0.030076700616119705, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 17 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:32:47.429403: step: 4/77, loss: 0.002295812126249075 2023-01-23 23:32:48.679202: step: 8/77, loss: 0.011710532009601593 2023-01-23 23:32:49.989895: step: 12/77, loss: 2.3140978555602487e-06 2023-01-23 23:32:51.298045: step: 16/77, loss: 2.1024964098614873e-06 2023-01-23 23:32:52.613140: step: 20/77, loss: 0.0002727890096139163 2023-01-23 23:32:53.919098: step: 24/77, loss: 6.954662239877507e-05 2023-01-23 23:32:55.266011: step: 28/77, loss: 0.0008974984521046281 2023-01-23 23:32:56.559614: step: 32/77, loss: 2.525923446228262e-05 2023-01-23 23:32:57.883021: step: 36/77, loss: 0.042360275983810425 2023-01-23 23:32:59.212084: step: 40/77, loss: 0.00043919257586821914 2023-01-23 23:33:00.484583: step: 44/77, loss: 0.001692838268354535 2023-01-23 23:33:01.791329: step: 48/77, loss: 0.00035355580621398985 2023-01-23 23:33:03.098217: step: 52/77, loss: 0.0008009643061086535 2023-01-23 23:33:04.413103: step: 56/77, loss: 0.0065523674711585045 2023-01-23 23:33:05.730098: step: 60/77, loss: 0.01772763580083847 2023-01-23 23:33:07.013891: step: 64/77, loss: 0.001366381999105215 2023-01-23 23:33:08.380919: step: 68/77, loss: 0.07654806971549988 2023-01-23 23:33:09.698811: step: 72/77, loss: 1.4059327440918423e-05 2023-01-23 23:33:11.032432: step: 76/77, loss: 0.016522567719221115 2023-01-23 23:33:12.342181: step: 80/77, loss: 0.008804031647741795 2023-01-23 23:33:13.694739: step: 84/77, loss: 0.009108972735702991 2023-01-23 23:33:15.087069: step: 88/77, loss: 0.0046868640929460526 2023-01-23 23:33:16.470343: step: 92/77, loss: 0.087021104991436 2023-01-23 23:33:17.759573: step: 96/77, loss: 3.298327646916732e-05 2023-01-23 23:33:19.103649: step: 100/77, loss: 0.014686529524624348 2023-01-23 23:33:20.371735: step: 104/77, loss: 7.930253559607081e-06 2023-01-23 23:33:21.651132: step: 108/77, loss: 0.011731461621820927 2023-01-23 23:33:22.940398: step: 112/77, loss: 0.0001062441078829579 2023-01-23 23:33:24.313900: step: 116/77, loss: 0.024497399106621742 2023-01-23 23:33:25.689797: step: 120/77, loss: 0.01568659022450447 2023-01-23 23:33:26.989946: step: 124/77, loss: 0.04065697267651558 2023-01-23 23:33:28.305982: step: 128/77, loss: 0.004619527142494917 2023-01-23 23:33:29.632612: step: 132/77, loss: 2.9057164852019923e-07 2023-01-23 23:33:30.904505: step: 136/77, loss: 2.6861227524932474e-05 2023-01-23 23:33:32.224199: step: 140/77, loss: 0.00025511058629490435 2023-01-23 23:33:33.536471: step: 144/77, loss: 0.017772279679775238 2023-01-23 23:33:34.834354: step: 148/77, loss: 0.0006557226879522204 2023-01-23 23:33:36.146228: step: 152/77, loss: 0.010482666082680225 2023-01-23 23:33:37.450144: step: 156/77, loss: 0.13506212830543518 2023-01-23 23:33:38.762205: step: 160/77, loss: 0.19042086601257324 2023-01-23 23:33:40.049963: step: 164/77, loss: 0.0016938840271905065 2023-01-23 23:33:41.381881: step: 168/77, loss: 0.002290728036314249 2023-01-23 23:33:42.712517: step: 172/77, loss: 0.0020732858683913946 2023-01-23 23:33:43.989185: step: 176/77, loss: 0.0001949071593116969 2023-01-23 23:33:45.279279: step: 180/77, loss: 0.00787076260894537 2023-01-23 23:33:46.629482: step: 184/77, loss: 5.562337719311472e-06 2023-01-23 23:33:47.953830: step: 188/77, loss: 4.0657974750502035e-05 2023-01-23 23:33:49.259466: step: 192/77, loss: 0.06441272795200348 2023-01-23 23:33:50.555158: step: 196/77, loss: 0.009098620153963566 2023-01-23 23:33:51.845405: step: 200/77, loss: 0.002766120946034789 2023-01-23 23:33:53.192276: step: 204/77, loss: 0.029924781993031502 2023-01-23 23:33:54.519373: step: 208/77, loss: 0.000314296135911718 2023-01-23 23:33:55.861702: step: 212/77, loss: 0.006736287847161293 2023-01-23 23:33:57.116881: step: 216/77, loss: 0.02019321545958519 2023-01-23 23:33:58.426450: step: 220/77, loss: 0.005796336568892002 2023-01-23 23:33:59.737835: step: 224/77, loss: 0.0067178416065871716 2023-01-23 23:34:01.085896: step: 228/77, loss: 0.00044596640509553254 2023-01-23 23:34:02.419457: step: 232/77, loss: 0.028060777112841606 2023-01-23 23:34:03.677803: step: 236/77, loss: 0.00021894060773774981 2023-01-23 23:34:05.012558: step: 240/77, loss: 1.1538486432982609e-05 2023-01-23 23:34:06.320308: step: 244/77, loss: 0.01234140433371067 2023-01-23 23:34:07.542614: step: 248/77, loss: 0.0018924312898889184 2023-01-23 23:34:08.865314: step: 252/77, loss: 0.048851627856492996 2023-01-23 23:34:10.159741: step: 256/77, loss: 0.0031984003726392984 2023-01-23 23:34:11.401178: step: 260/77, loss: 0.008823526091873646 2023-01-23 23:34:12.720241: step: 264/77, loss: 0.016866758465766907 2023-01-23 23:34:14.080534: step: 268/77, loss: 0.014881649054586887 2023-01-23 23:34:15.391987: step: 272/77, loss: 9.640722419135273e-05 2023-01-23 23:34:16.702762: step: 276/77, loss: 0.05817005783319473 2023-01-23 23:34:18.012560: step: 280/77, loss: 0.01585889607667923 2023-01-23 23:34:19.300395: step: 284/77, loss: 0.015098603442311287 2023-01-23 23:34:20.633905: step: 288/77, loss: 0.054936982691287994 2023-01-23 23:34:21.958624: step: 292/77, loss: 0.11156722158193588 2023-01-23 23:34:23.255337: step: 296/77, loss: 0.013453776948153973 2023-01-23 23:34:24.527421: step: 300/77, loss: 0.003620242001488805 2023-01-23 23:34:25.782798: step: 304/77, loss: 0.004386755637824535 2023-01-23 23:34:27.100778: step: 308/77, loss: 9.987157682189718e-05 2023-01-23 23:34:28.421037: step: 312/77, loss: 0.019409600645303726 2023-01-23 23:34:29.738685: step: 316/77, loss: 0.003329006489366293 2023-01-23 23:34:31.059914: step: 320/77, loss: 0.00030134027474559844 2023-01-23 23:34:32.381504: step: 324/77, loss: 1.3888611647416838e-05 2023-01-23 23:34:33.683015: step: 328/77, loss: 0.04591492563486099 2023-01-23 23:34:34.949070: step: 332/77, loss: 0.0008835216867737472 2023-01-23 23:34:36.251519: step: 336/77, loss: 0.0014211467932909727 2023-01-23 23:34:37.556457: step: 340/77, loss: 0.005028215236961842 2023-01-23 23:34:38.870972: step: 344/77, loss: 0.0277964249253273 2023-01-23 23:34:40.203339: step: 348/77, loss: 0.002034218981862068 2023-01-23 23:34:41.486128: step: 352/77, loss: 2.3846632757340558e-05 2023-01-23 23:34:42.839397: step: 356/77, loss: 0.00022707131574861705 2023-01-23 23:34:44.182154: step: 360/77, loss: 0.0015117726288735867 2023-01-23 23:34:45.478561: step: 364/77, loss: 0.006705767475068569 2023-01-23 23:34:46.806088: step: 368/77, loss: 0.0004414547875057906 2023-01-23 23:34:48.121669: step: 372/77, loss: 3.6705201637232676e-05 2023-01-23 23:34:49.406031: step: 376/77, loss: 0.002922557760030031 2023-01-23 23:34:50.747209: step: 380/77, loss: 0.0046601551584899426 2023-01-23 23:34:52.114684: step: 384/77, loss: 0.022119011729955673 2023-01-23 23:34:53.433318: step: 388/77, loss: 0.030487284064292908 ================================================== Loss: 0.016 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04988944951527864, 'epoch': 17} Test Chinese: {'template': {'p': 0.9577464788732394, 'r': 0.5396825396825397, 'f1': 0.6903553299492384}, 'slot': {'p': 0.5806451612903226, 'r': 0.015463917525773196, 'f1': 0.030125523012552297}, 'combined': 0.020797315379223916, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04988944951527864, 'epoch': 17} Test Korean: {'template': {'p': 0.9577464788732394, 'r': 0.5396825396825397, 'f1': 0.6903553299492384}, 'slot': {'p': 0.5588235294117647, 'r': 0.01632302405498282, 'f1': 0.03171953255425709}, 'combined': 0.021897748362329765, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04988944951527864, 'epoch': 17} Test Russian: {'template': {'p': 0.9583333333333334, 'r': 0.5476190476190477, 'f1': 0.696969696969697}, 'slot': {'p': 0.5806451612903226, 'r': 0.015463917525773196, 'f1': 0.030125523012552297}, 'combined': 0.02099657664511221, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 18 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:36:35.488881: step: 4/77, loss: 0.00043394678505137563 2023-01-23 23:36:36.792883: step: 8/77, loss: 0.024783240631222725 2023-01-23 23:36:38.075383: step: 12/77, loss: 0.030805286020040512 2023-01-23 23:36:39.353236: step: 16/77, loss: 0.0003471440286375582 2023-01-23 23:36:40.670228: step: 20/77, loss: 0.014186517335474491 2023-01-23 23:36:41.932759: step: 24/77, loss: 2.8209165975567885e-05 2023-01-23 23:36:43.228506: step: 28/77, loss: 0.017384840175509453 2023-01-23 23:36:44.538315: step: 32/77, loss: 0.013848107308149338 2023-01-23 23:36:45.827649: step: 36/77, loss: 0.018242739140987396 2023-01-23 23:36:47.181225: step: 40/77, loss: 0.017047669738531113 2023-01-23 23:36:48.514445: step: 44/77, loss: 2.3446407794835977e-05 2023-01-23 23:36:49.833073: step: 48/77, loss: 0.0007906182436272502 2023-01-23 23:36:51.174300: step: 52/77, loss: 4.537371569313109e-05 2023-01-23 23:36:52.475282: step: 56/77, loss: 0.0015261220978572965 2023-01-23 23:36:53.789100: step: 60/77, loss: 0.03159713000059128 2023-01-23 23:36:55.109709: step: 64/77, loss: 0.0020535080693662167 2023-01-23 23:36:56.445699: step: 68/77, loss: 0.02969714067876339 2023-01-23 23:36:57.725562: step: 72/77, loss: 0.000775107997469604 2023-01-23 23:36:59.059539: step: 76/77, loss: 0.005453579593449831 2023-01-23 23:37:00.386545: step: 80/77, loss: 0.007755403406918049 2023-01-23 23:37:01.650596: step: 84/77, loss: 0.00011459521192591637 2023-01-23 23:37:02.991462: step: 88/77, loss: 9.09103164303815e-06 2023-01-23 23:37:04.258543: step: 92/77, loss: 0.0023775151930749416 2023-01-23 23:37:05.564130: step: 96/77, loss: 0.023311413824558258 2023-01-23 23:37:06.896894: step: 100/77, loss: 0.0037164664827287197 2023-01-23 23:37:08.241641: step: 104/77, loss: 0.04141361266374588 2023-01-23 23:37:09.616562: step: 108/77, loss: 0.015981419011950493 2023-01-23 23:37:10.916837: step: 112/77, loss: 4.68757571070455e-06 2023-01-23 23:37:12.208722: step: 116/77, loss: 2.9949942472740076e-06 2023-01-23 23:37:13.519471: step: 120/77, loss: 9.493254765402526e-05 2023-01-23 23:37:14.818546: step: 124/77, loss: 0.018639886751770973 2023-01-23 23:37:16.055321: step: 128/77, loss: 0.05404244363307953 2023-01-23 23:37:17.332832: step: 132/77, loss: 1.4669490155938547e-05 2023-01-23 23:37:18.617556: step: 136/77, loss: 0.008017289452254772 2023-01-23 23:37:19.928145: step: 140/77, loss: 0.0006571430712938309 2023-01-23 23:37:21.252353: step: 144/77, loss: 0.0001994997583096847 2023-01-23 23:37:22.556900: step: 148/77, loss: 0.0187743678689003 2023-01-23 23:37:23.827562: step: 152/77, loss: 0.0399123877286911 2023-01-23 23:37:25.109922: step: 156/77, loss: 0.005981434136629105 2023-01-23 23:37:26.400216: step: 160/77, loss: 0.001792241120710969 2023-01-23 23:37:27.719006: step: 164/77, loss: 0.0022695541847497225 2023-01-23 23:37:29.015216: step: 168/77, loss: 0.00027215006412006915 2023-01-23 23:37:30.332647: step: 172/77, loss: 0.0007794547127559781 2023-01-23 23:37:31.624854: step: 176/77, loss: 5.349092680262402e-05 2023-01-23 23:37:32.959772: step: 180/77, loss: 0.00024154865241143852 2023-01-23 23:37:34.352063: step: 184/77, loss: 0.042345404624938965 2023-01-23 23:37:35.676413: step: 188/77, loss: 0.0012398757971823215 2023-01-23 23:37:36.985841: step: 192/77, loss: 0.022443875670433044 2023-01-23 23:37:38.326521: step: 196/77, loss: 2.1823982024216093e-05 2023-01-23 23:37:39.692661: step: 200/77, loss: 1.095620700652944e-05 2023-01-23 23:37:40.977338: step: 204/77, loss: 0.0005885653663426638 2023-01-23 23:37:42.309858: step: 208/77, loss: 0.00010669405310181901 2023-01-23 23:37:43.588364: step: 212/77, loss: 1.8998762243427336e-06 2023-01-23 23:37:44.861282: step: 216/77, loss: 0.021896017715334892 2023-01-23 23:37:46.169382: step: 220/77, loss: 0.00041468662675470114 2023-01-23 23:37:47.488248: step: 224/77, loss: 0.02845904417335987 2023-01-23 23:37:48.739721: step: 228/77, loss: 1.4626778465753887e-05 2023-01-23 23:37:50.061202: step: 232/77, loss: 1.9547209376469254e-05 2023-01-23 23:37:51.344351: step: 236/77, loss: 0.003462222870439291 2023-01-23 23:37:52.656428: step: 240/77, loss: 0.00018325488781556487 2023-01-23 23:37:53.995821: step: 244/77, loss: 0.000745791126973927 2023-01-23 23:37:55.352783: step: 248/77, loss: 5.230200486039394e-07 2023-01-23 23:37:56.645663: step: 252/77, loss: 0.00013812491670250893 2023-01-23 23:37:57.947510: step: 256/77, loss: 9.543041232973337e-05 2023-01-23 23:37:59.256688: step: 260/77, loss: 2.5750792701728642e-05 2023-01-23 23:38:00.558394: step: 264/77, loss: 7.58459500502795e-05 2023-01-23 23:38:01.808218: step: 268/77, loss: 1.9997112303826725e-06 2023-01-23 23:38:03.070523: step: 272/77, loss: 3.58948955181404e-06 2023-01-23 23:38:04.356189: step: 276/77, loss: 7.18016917744535e-06 2023-01-23 23:38:05.652971: step: 280/77, loss: 0.0010476586176082492 2023-01-23 23:38:06.992217: step: 284/77, loss: 0.08216924965381622 2023-01-23 23:38:08.285562: step: 288/77, loss: 0.0045933956280350685 2023-01-23 23:38:09.618933: step: 292/77, loss: 0.02022452838718891 2023-01-23 23:38:10.982072: step: 296/77, loss: 3.2676407499820925e-06 2023-01-23 23:38:12.188998: step: 300/77, loss: 8.142985461745411e-05 2023-01-23 23:38:13.528359: step: 304/77, loss: 6.297406798694283e-05 2023-01-23 23:38:14.852356: step: 308/77, loss: 4.044241723022424e-05 2023-01-23 23:38:16.145336: step: 312/77, loss: 0.009998363442718983 2023-01-23 23:38:17.453027: step: 316/77, loss: 0.0004960843361914158 2023-01-23 23:38:18.793932: step: 320/77, loss: 2.751972488113097e-06 2023-01-23 23:38:20.119547: step: 324/77, loss: 0.00029293610714375973 2023-01-23 23:38:21.406696: step: 328/77, loss: 0.021552830934524536 2023-01-23 23:38:22.772026: step: 332/77, loss: 0.021268876269459724 2023-01-23 23:38:24.070561: step: 336/77, loss: 0.0005264312494546175 2023-01-23 23:38:25.405669: step: 340/77, loss: 9.673903150542174e-06 2023-01-23 23:38:26.718285: step: 344/77, loss: 0.020955218002200127 2023-01-23 23:38:28.042697: step: 348/77, loss: 3.5274133551865816e-05 2023-01-23 23:38:29.314776: step: 352/77, loss: 1.1457627806521486e-05 2023-01-23 23:38:30.565691: step: 356/77, loss: 0.0015478396089747548 2023-01-23 23:38:31.878458: step: 360/77, loss: 9.202599903801456e-05 2023-01-23 23:38:33.213322: step: 364/77, loss: 0.0002595757250674069 2023-01-23 23:38:34.507244: step: 368/77, loss: 0.00846023764461279 2023-01-23 23:38:35.838513: step: 372/77, loss: 0.0002929982729256153 2023-01-23 23:38:37.102702: step: 376/77, loss: 0.004742179997265339 2023-01-23 23:38:38.412982: step: 380/77, loss: 0.00033104015165008605 2023-01-23 23:38:39.730518: step: 384/77, loss: 0.003096122294664383 2023-01-23 23:38:41.030311: step: 388/77, loss: 4.091663868166506e-06 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5166666666666667, 'f1': 0.6813186813186815}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.043411455800836336, 'epoch': 18} Test Chinese: {'template': {'p': 0.9696969696969697, 'r': 0.5079365079365079, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6190476190476191, 'r': 0.022336769759450172, 'f1': 0.04311774461028192}, 'combined': 0.02874516307352128, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.0443247402847249, 'epoch': 18} Test Korean: {'template': {'p': 0.9696969696969697, 'r': 0.5079365079365079, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6190476190476191, 'r': 0.022336769759450172, 'f1': 0.04311774461028192}, 'combined': 0.02874516307352128, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5166666666666667, 'f1': 0.6813186813186815}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.043411455800836336, 'epoch': 18} Test Russian: {'template': {'p': 0.9701492537313433, 'r': 0.5158730158730159, 'f1': 0.6735751295336788}, 'slot': {'p': 0.6, 'r': 0.020618556701030927, 'f1': 0.03986710963455149}, 'combined': 0.026853493536226396, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 19 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:40:23.173578: step: 4/77, loss: 0.08355798572301865 2023-01-23 23:40:24.460894: step: 8/77, loss: 0.01619032397866249 2023-01-23 23:40:25.786314: step: 12/77, loss: 0.006282666698098183 2023-01-23 23:40:27.068568: step: 16/77, loss: 0.04044962674379349 2023-01-23 23:40:28.346102: step: 20/77, loss: 2.1754760837211506e-06 2023-01-23 23:40:29.657466: step: 24/77, loss: 0.001033884473145008 2023-01-23 23:40:31.006622: step: 28/77, loss: 0.0010141909588128328 2023-01-23 23:40:32.313836: step: 32/77, loss: 0.0029271808452904224 2023-01-23 23:40:33.611054: step: 36/77, loss: 1.6010548279155046e-05 2023-01-23 23:40:34.907110: step: 40/77, loss: 0.029112864285707474 2023-01-23 23:40:36.184747: step: 44/77, loss: 4.747176262753783e-06 2023-01-23 23:40:37.481829: step: 48/77, loss: 3.406681207707152e-05 2023-01-23 23:40:38.777914: step: 52/77, loss: 0.0006297418149188161 2023-01-23 23:40:40.085095: step: 56/77, loss: 5.055548172094859e-06 2023-01-23 23:40:41.370292: step: 60/77, loss: 4.45558616775088e-05 2023-01-23 23:40:42.727503: step: 64/77, loss: 1.1200027074664831e-05 2023-01-23 23:40:44.026283: step: 68/77, loss: 0.0011179175926372409 2023-01-23 23:40:45.352239: step: 72/77, loss: 8.404656546190381e-05 2023-01-23 23:40:46.688023: step: 76/77, loss: 0.0016348809003829956 2023-01-23 23:40:48.011577: step: 80/77, loss: 0.0011514124926179647 2023-01-23 23:40:49.299334: step: 84/77, loss: 0.04622327536344528 2023-01-23 23:40:50.594778: step: 88/77, loss: 0.01025646273046732 2023-01-23 23:40:51.952764: step: 92/77, loss: 8.574593084631488e-05 2023-01-23 23:40:53.325024: step: 96/77, loss: 0.0008810489089228213 2023-01-23 23:40:54.582175: step: 100/77, loss: 0.20770668983459473 2023-01-23 23:40:55.931537: step: 104/77, loss: 0.025355808436870575 2023-01-23 23:40:57.286479: step: 108/77, loss: 0.00010351461969548836 2023-01-23 23:40:58.591542: step: 112/77, loss: 0.0005154706886969507 2023-01-23 23:40:59.884920: step: 116/77, loss: 0.00017362600192427635 2023-01-23 23:41:01.187006: step: 120/77, loss: 4.7918918426148593e-05 2023-01-23 23:41:02.469207: step: 124/77, loss: 1.1120209819637239e-05 2023-01-23 23:41:03.770654: step: 128/77, loss: 5.7865377129928675e-06 2023-01-23 23:41:05.067049: step: 132/77, loss: 0.0011631065281108022 2023-01-23 23:41:06.324400: step: 136/77, loss: 0.0003213899035472423 2023-01-23 23:41:07.600240: step: 140/77, loss: 0.016531798988580704 2023-01-23 23:41:08.912801: step: 144/77, loss: 0.0002846270508598536 2023-01-23 23:41:10.273088: step: 148/77, loss: 5.7959747209679335e-05 2023-01-23 23:41:11.634057: step: 152/77, loss: 0.001835025497712195 2023-01-23 23:41:12.994989: step: 156/77, loss: 7.920786447357386e-05 2023-01-23 23:41:14.348610: step: 160/77, loss: 0.03611599653959274 2023-01-23 23:41:15.667416: step: 164/77, loss: 0.024288857355713844 2023-01-23 23:41:16.985871: step: 168/77, loss: 0.00010517123155295849 2023-01-23 23:41:18.306657: step: 172/77, loss: 0.001092438818886876 2023-01-23 23:41:19.585302: step: 176/77, loss: 0.007640526629984379 2023-01-23 23:41:20.904408: step: 180/77, loss: 0.0007359448936767876 2023-01-23 23:41:22.245272: step: 184/77, loss: 1.0519408533582464e-05 2023-01-23 23:41:23.601619: step: 188/77, loss: 0.00011256665311520919 2023-01-23 23:41:24.936920: step: 192/77, loss: 0.0020343991927802563 2023-01-23 23:41:26.231638: step: 196/77, loss: 2.485430059095961e-06 2023-01-23 23:41:27.563579: step: 200/77, loss: 0.0034714534413069487 2023-01-23 23:41:28.901255: step: 204/77, loss: 0.17109277844429016 2023-01-23 23:41:30.189885: step: 208/77, loss: 0.002324912929907441 2023-01-23 23:41:31.492329: step: 212/77, loss: 0.013834369368851185 2023-01-23 23:41:32.831810: step: 216/77, loss: 0.004987460561096668 2023-01-23 23:41:34.114993: step: 220/77, loss: 0.00308143999427557 2023-01-23 23:41:35.428897: step: 224/77, loss: 0.00012807335588149726 2023-01-23 23:41:36.715372: step: 228/77, loss: 0.00022715324303135276 2023-01-23 23:41:38.023816: step: 232/77, loss: 0.00017162026779260486 2023-01-23 23:41:39.321645: step: 236/77, loss: 0.003213282907381654 2023-01-23 23:41:40.599572: step: 240/77, loss: 0.0026016314513981342 2023-01-23 23:41:41.918795: step: 244/77, loss: 0.0205234307795763 2023-01-23 23:41:43.218325: step: 248/77, loss: 0.0006555135478265584 2023-01-23 23:41:44.585084: step: 252/77, loss: 0.0015501710586249828 2023-01-23 23:41:45.862680: step: 256/77, loss: 0.07120595127344131 2023-01-23 23:41:47.197102: step: 260/77, loss: 0.04038810729980469 2023-01-23 23:41:48.468075: step: 264/77, loss: 0.005638515576720238 2023-01-23 23:41:49.789842: step: 268/77, loss: 0.014268961735069752 2023-01-23 23:41:51.121060: step: 272/77, loss: 2.5206656573573127e-05 2023-01-23 23:41:52.416482: step: 276/77, loss: 9.096400026464835e-05 2023-01-23 23:41:53.767613: step: 280/77, loss: 0.000966939958743751 2023-01-23 23:41:55.117671: step: 284/77, loss: 0.008116503246128559 2023-01-23 23:41:56.422833: step: 288/77, loss: 0.007118896581232548 2023-01-23 23:41:57.751581: step: 292/77, loss: 2.1705160179408267e-05 2023-01-23 23:41:59.094754: step: 296/77, loss: 0.0001560598029755056 2023-01-23 23:42:00.365451: step: 300/77, loss: 9.638090705266222e-05 2023-01-23 23:42:01.679763: step: 304/77, loss: 0.0005366081604734063 2023-01-23 23:42:02.990338: step: 308/77, loss: 0.0001767091453075409 2023-01-23 23:42:04.251865: step: 312/77, loss: 4.1324805351905525e-05 2023-01-23 23:42:05.536226: step: 316/77, loss: 0.0008603700553067029 2023-01-23 23:42:06.829314: step: 320/77, loss: 9.919640433508903e-05 2023-01-23 23:42:08.166752: step: 324/77, loss: 0.0005278933676891029 2023-01-23 23:42:09.469937: step: 328/77, loss: 0.00010486481914995238 2023-01-23 23:42:10.850294: step: 332/77, loss: 0.0002847413707058877 2023-01-23 23:42:12.152478: step: 336/77, loss: 4.130376692046411e-05 2023-01-23 23:42:13.461857: step: 340/77, loss: 0.004138815216720104 2023-01-23 23:42:14.779886: step: 344/77, loss: 1.7732038486428792e-06 2023-01-23 23:42:16.107851: step: 348/77, loss: 0.0001266787585336715 2023-01-23 23:42:17.419109: step: 352/77, loss: 0.0001363552873954177 2023-01-23 23:42:18.747472: step: 356/77, loss: 0.0012341360561549664 2023-01-23 23:42:20.096324: step: 360/77, loss: 0.00014551937056239694 2023-01-23 23:42:21.464145: step: 364/77, loss: 0.0005572434747591615 2023-01-23 23:42:22.828289: step: 368/77, loss: 0.00889385025948286 2023-01-23 23:42:24.102352: step: 372/77, loss: 5.846058229508344e-06 2023-01-23 23:42:25.374842: step: 376/77, loss: 0.00043951894622296095 2023-01-23 23:42:26.660730: step: 380/77, loss: 0.001410671859048307 2023-01-23 23:42:27.969470: step: 384/77, loss: 0.007698127068579197 2023-01-23 23:42:29.254899: step: 388/77, loss: 0.00037329865153878927 ================================================== Loss: 0.010 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.0443247402847249, 'epoch': 19} Test Chinese: {'template': {'p': 0.9420289855072463, 'r': 0.5158730158730159, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.01804123711340206, 'f1': 0.03499999999999999}, 'combined': 0.023333333333333324, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 19} Test Korean: {'template': {'p': 0.9420289855072463, 'r': 0.5158730158730159, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.01804123711340206, 'f1': 0.035029190992493735}, 'combined': 0.023352793994995822, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.0443247402847249, 'epoch': 19} Test Russian: {'template': {'p': 0.9420289855072463, 'r': 0.5158730158730159, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5757575757575758, 'r': 0.01632302405498282, 'f1': 0.031746031746031744}, 'combined': 0.021164021164021163, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 20 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:44:10.987719: step: 4/77, loss: 0.002364807529374957 2023-01-23 23:44:12.294586: step: 8/77, loss: 0.0034368294291198254 2023-01-23 23:44:13.585698: step: 12/77, loss: 1.7614825992495753e-05 2023-01-23 23:44:14.851583: step: 16/77, loss: 2.296705133630894e-05 2023-01-23 23:44:16.166754: step: 20/77, loss: 1.5452326351805823e-06 2023-01-23 23:44:17.433192: step: 24/77, loss: 0.0002031271142186597 2023-01-23 23:44:18.684633: step: 28/77, loss: 0.00014223124890122563 2023-01-23 23:44:19.974668: step: 32/77, loss: 0.0011827124981209636 2023-01-23 23:44:21.316313: step: 36/77, loss: 0.002860462525859475 2023-01-23 23:44:22.608466: step: 40/77, loss: 5.542115104617551e-06 2023-01-23 23:44:23.947533: step: 44/77, loss: 0.009848167188465595 2023-01-23 23:44:25.232309: step: 48/77, loss: 0.008947617374360561 2023-01-23 23:44:26.563728: step: 52/77, loss: 0.00013507247786037624 2023-01-23 23:44:27.871836: step: 56/77, loss: 0.007452580612152815 2023-01-23 23:44:29.211442: step: 60/77, loss: 1.636518572922796e-05 2023-01-23 23:44:30.463073: step: 64/77, loss: 0.00012901745503768325 2023-01-23 23:44:31.788643: step: 68/77, loss: 7.399632158922032e-05 2023-01-23 23:44:33.083294: step: 72/77, loss: 0.020840534940361977 2023-01-23 23:44:34.422748: step: 76/77, loss: 0.13874861598014832 2023-01-23 23:44:35.711358: step: 80/77, loss: 0.0004687589535024017 2023-01-23 23:44:37.035420: step: 84/77, loss: 0.00034775465610437095 2023-01-23 23:44:38.326549: step: 88/77, loss: 0.0005079308757558465 2023-01-23 23:44:39.634941: step: 92/77, loss: 0.01079073827713728 2023-01-23 23:44:40.925946: step: 96/77, loss: 0.0005955763044767082 2023-01-23 23:44:42.192545: step: 100/77, loss: 0.007185032125562429 2023-01-23 23:44:43.455319: step: 104/77, loss: 0.0006630965508520603 2023-01-23 23:44:44.851915: step: 108/77, loss: 6.602683424716815e-05 2023-01-23 23:44:46.137105: step: 112/77, loss: 4.022960638394579e-05 2023-01-23 23:44:47.426427: step: 116/77, loss: 0.00010086910333484411 2023-01-23 23:44:48.796211: step: 120/77, loss: 0.01682485081255436 2023-01-23 23:44:50.086382: step: 124/77, loss: 3.7639067613781663e-06 2023-01-23 23:44:51.421425: step: 128/77, loss: 3.7704110582126305e-05 2023-01-23 23:44:52.730006: step: 132/77, loss: 0.0004095855401828885 2023-01-23 23:44:54.028400: step: 136/77, loss: 0.002314388519152999 2023-01-23 23:44:55.293274: step: 140/77, loss: 0.001917900750413537 2023-01-23 23:44:56.586295: step: 144/77, loss: 0.00695814611390233 2023-01-23 23:44:57.852417: step: 148/77, loss: 0.00012202710058772936 2023-01-23 23:44:59.169476: step: 152/77, loss: 0.0001240679412148893 2023-01-23 23:45:00.493659: step: 156/77, loss: 5.034492915001465e-06 2023-01-23 23:45:01.822893: step: 160/77, loss: 0.00017982257122639567 2023-01-23 23:45:03.160068: step: 164/77, loss: 0.006036675069481134 2023-01-23 23:45:04.448833: step: 168/77, loss: 0.0010000867769122124 2023-01-23 23:45:05.746205: step: 172/77, loss: 0.00018073295359499753 2023-01-23 23:45:07.089549: step: 176/77, loss: 4.4574870116775855e-05 2023-01-23 23:45:08.390362: step: 180/77, loss: 0.05173643305897713 2023-01-23 23:45:09.737509: step: 184/77, loss: 0.0015796958468854427 2023-01-23 23:45:11.072473: step: 188/77, loss: 0.0010724187595769763 2023-01-23 23:45:12.374630: step: 192/77, loss: 0.004048222675919533 2023-01-23 23:45:13.660620: step: 196/77, loss: 0.009710166603326797 2023-01-23 23:45:14.964342: step: 200/77, loss: 0.028478579595685005 2023-01-23 23:45:16.276849: step: 204/77, loss: 7.516022014897317e-05 2023-01-23 23:45:17.610756: step: 208/77, loss: 0.0027792761102318764 2023-01-23 23:45:18.939253: step: 212/77, loss: 8.034476195462048e-05 2023-01-23 23:45:20.284402: step: 216/77, loss: 9.811624295252841e-06 2023-01-23 23:45:21.562913: step: 220/77, loss: 0.003008501837030053 2023-01-23 23:45:22.889076: step: 224/77, loss: 1.222667378897313e-05 2023-01-23 23:45:24.239180: step: 228/77, loss: 0.0003688423312269151 2023-01-23 23:45:25.543490: step: 232/77, loss: 0.002369387773796916 2023-01-23 23:45:26.875935: step: 236/77, loss: 0.0002195181732531637 2023-01-23 23:45:28.204530: step: 240/77, loss: 0.005662532523274422 2023-01-23 23:45:29.493694: step: 244/77, loss: 1.3159821719455067e-05 2023-01-23 23:45:30.850266: step: 248/77, loss: 0.007855149917304516 2023-01-23 23:45:32.168365: step: 252/77, loss: 0.00671668816357851 2023-01-23 23:45:33.473387: step: 256/77, loss: 0.02250758931040764 2023-01-23 23:45:34.777254: step: 260/77, loss: 0.00010617719090078026 2023-01-23 23:45:36.089051: step: 264/77, loss: 0.019556432962417603 2023-01-23 23:45:37.398760: step: 268/77, loss: 0.001554210321046412 2023-01-23 23:45:38.717895: step: 272/77, loss: 9.581274298398057e-07 2023-01-23 23:45:40.094854: step: 276/77, loss: 0.0031252556946128607 2023-01-23 23:45:41.347383: step: 280/77, loss: 0.022521525621414185 2023-01-23 23:45:42.643658: step: 284/77, loss: 0.005696204490959644 2023-01-23 23:45:43.985906: step: 288/77, loss: 0.00011008291767211631 2023-01-23 23:45:45.278520: step: 292/77, loss: 9.962310286937281e-05 2023-01-23 23:45:46.566217: step: 296/77, loss: 0.008221546187996864 2023-01-23 23:45:47.817721: step: 300/77, loss: 8.898541273083538e-05 2023-01-23 23:45:49.123388: step: 304/77, loss: 3.8327845686580986e-05 2023-01-23 23:45:50.414913: step: 308/77, loss: 0.003969868179410696 2023-01-23 23:45:51.721497: step: 312/77, loss: 0.08264435082674026 2023-01-23 23:45:53.019061: step: 316/77, loss: 0.05371001735329628 2023-01-23 23:45:54.306574: step: 320/77, loss: 5.841174584020337e-07 2023-01-23 23:45:55.605792: step: 324/77, loss: 6.554293941007927e-05 2023-01-23 23:45:56.885544: step: 328/77, loss: 9.15273412829265e-05 2023-01-23 23:45:58.212552: step: 332/77, loss: 0.0001446074602426961 2023-01-23 23:45:59.509305: step: 336/77, loss: 0.0006172170978970826 2023-01-23 23:46:00.783651: step: 340/77, loss: 0.0031737142708152533 2023-01-23 23:46:02.091332: step: 344/77, loss: 0.00707965437322855 2023-01-23 23:46:03.408282: step: 348/77, loss: 5.047888771514408e-05 2023-01-23 23:46:04.735254: step: 352/77, loss: 7.201795961009338e-05 2023-01-23 23:46:06.016285: step: 356/77, loss: 0.00013962757657282054 2023-01-23 23:46:07.307486: step: 360/77, loss: 0.006290529388934374 2023-01-23 23:46:08.645260: step: 364/77, loss: 0.00016902160132303834 2023-01-23 23:46:09.979391: step: 368/77, loss: 0.06438884884119034 2023-01-23 23:46:11.262552: step: 372/77, loss: 4.708188043878181e-06 2023-01-23 23:46:12.608418: step: 376/77, loss: 0.00014791273861192167 2023-01-23 23:46:13.910121: step: 380/77, loss: 7.04221602063626e-05 2023-01-23 23:46:15.223905: step: 384/77, loss: 7.290163193829358e-05 2023-01-23 23:46:16.513311: step: 388/77, loss: 7.028390245977789e-05 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5555555555555556, 'f1': 0.7000000000000001}, 'slot': {'p': 0.5897435897435898, 'r': 0.019759450171821305, 'f1': 0.03823773898586866}, 'combined': 0.026766417290108063, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Korean: {'template': {'p': 0.9466666666666667, 'r': 0.5634920634920635, 'f1': 0.7064676616915423}, 'slot': {'p': 0.6052631578947368, 'r': 0.019759450171821305, 'f1': 0.038269550748752074}, 'combined': 0.027036200031456688, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Russian: {'template': {'p': 0.9452054794520548, 'r': 0.5476190476190477, 'f1': 0.6934673366834171}, 'slot': {'p': 0.5675675675675675, 'r': 0.01804123711340206, 'f1': 0.03497085761865112}, 'combined': 0.024251147494340975, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 21 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:47:58.524012: step: 4/77, loss: 0.0055143265053629875 2023-01-23 23:47:59.834618: step: 8/77, loss: 0.02832634560763836 2023-01-23 23:48:01.123417: step: 12/77, loss: 0.00014384837413672358 2023-01-23 23:48:02.496293: step: 16/77, loss: 0.0036363855469971895 2023-01-23 23:48:03.832500: step: 20/77, loss: 0.0075181955471634865 2023-01-23 23:48:05.129752: step: 24/77, loss: 0.00023805254022590816 2023-01-23 23:48:06.424088: step: 28/77, loss: 0.003992673009634018 2023-01-23 23:48:07.725557: step: 32/77, loss: 0.0005978421540930867 2023-01-23 23:48:09.070319: step: 36/77, loss: 1.4125964753475273e-06 2023-01-23 23:48:10.394206: step: 40/77, loss: 0.0005768768023699522 2023-01-23 23:48:11.696198: step: 44/77, loss: 8.522043935954571e-05 2023-01-23 23:48:12.994922: step: 48/77, loss: 0.0004115917254239321 2023-01-23 23:48:14.321189: step: 52/77, loss: 7.993353392521385e-06 2023-01-23 23:48:15.577417: step: 56/77, loss: 8.447294385405257e-05 2023-01-23 23:48:16.921061: step: 60/77, loss: 3.577906682039611e-05 2023-01-23 23:48:18.245144: step: 64/77, loss: 0.0001947486452991143 2023-01-23 23:48:19.556286: step: 68/77, loss: 0.0004472629225347191 2023-01-23 23:48:20.883319: step: 72/77, loss: 0.00015355259529314935 2023-01-23 23:48:22.239073: step: 76/77, loss: 0.0003373160434421152 2023-01-23 23:48:23.503313: step: 80/77, loss: 9.838744153967127e-05 2023-01-23 23:48:24.815303: step: 84/77, loss: 1.6177429642993957e-05 2023-01-23 23:48:26.089780: step: 88/77, loss: 0.004022568464279175 2023-01-23 23:48:27.340171: step: 92/77, loss: 3.932239269488491e-05 2023-01-23 23:48:28.655618: step: 96/77, loss: 0.001530507463030517 2023-01-23 23:48:29.937998: step: 100/77, loss: 0.003452786011621356 2023-01-23 23:48:31.242079: step: 104/77, loss: 0.005762745160609484 2023-01-23 23:48:32.536953: step: 108/77, loss: 0.004810965154320002 2023-01-23 23:48:33.847666: step: 112/77, loss: 0.00015320284001063555 2023-01-23 23:48:35.143461: step: 116/77, loss: 5.8151086705038324e-05 2023-01-23 23:48:36.440774: step: 120/77, loss: 4.310378244554158e-06 2023-01-23 23:48:37.769678: step: 124/77, loss: 2.6015195544459857e-06 2023-01-23 23:48:39.096841: step: 128/77, loss: 0.000375100236851722 2023-01-23 23:48:40.407914: step: 132/77, loss: 0.05096989497542381 2023-01-23 23:48:41.758871: step: 136/77, loss: 0.00031754543306306005 2023-01-23 23:48:43.054186: step: 140/77, loss: 0.0001472402800573036 2023-01-23 23:48:44.397939: step: 144/77, loss: 0.0007970663718879223 2023-01-23 23:48:45.752569: step: 148/77, loss: 0.013219388201832771 2023-01-23 23:48:47.016455: step: 152/77, loss: 0.00012928983778692782 2023-01-23 23:48:48.324870: step: 156/77, loss: 3.786048910114914e-05 2023-01-23 23:48:49.613885: step: 160/77, loss: 1.0483473488420714e-05 2023-01-23 23:48:50.918225: step: 164/77, loss: 3.6924047890352085e-05 2023-01-23 23:48:52.252552: step: 168/77, loss: 7.85590509622125e-06 2023-01-23 23:48:53.584853: step: 172/77, loss: 0.043523602187633514 2023-01-23 23:48:54.909541: step: 176/77, loss: 0.0007601756369695067 2023-01-23 23:48:56.242472: step: 180/77, loss: 0.024111945182085037 2023-01-23 23:48:57.569847: step: 184/77, loss: 6.514093911391683e-06 2023-01-23 23:48:58.915993: step: 188/77, loss: 0.0002271834819111973 2023-01-23 23:49:00.273466: step: 192/77, loss: 0.0009495330159552395 2023-01-23 23:49:01.620638: step: 196/77, loss: 1.7016720903484384e-06 2023-01-23 23:49:02.957728: step: 200/77, loss: 9.455228428123519e-05 2023-01-23 23:49:04.281783: step: 204/77, loss: 1.8074692889058497e-06 2023-01-23 23:49:05.625946: step: 208/77, loss: 5.894151854590746e-06 2023-01-23 23:49:06.916845: step: 212/77, loss: 2.3507000150857493e-05 2023-01-23 23:49:08.187720: step: 216/77, loss: 1.6973621313809417e-05 2023-01-23 23:49:09.465132: step: 220/77, loss: 0.022141138091683388 2023-01-23 23:49:10.764162: step: 224/77, loss: 2.8701071641989984e-05 2023-01-23 23:49:12.053077: step: 228/77, loss: 3.997721432824619e-05 2023-01-23 23:49:13.318604: step: 232/77, loss: 3.2146857847692445e-05 2023-01-23 23:49:14.650525: step: 236/77, loss: 1.425421032763552e-05 2023-01-23 23:49:15.948348: step: 240/77, loss: 0.00028763728914782405 2023-01-23 23:49:17.286700: step: 244/77, loss: 0.0006647562840953469 2023-01-23 23:49:18.585153: step: 248/77, loss: 0.0003965311625506729 2023-01-23 23:49:19.925902: step: 252/77, loss: 3.520384052535519e-05 2023-01-23 23:49:21.199409: step: 256/77, loss: 0.0005282217171043158 2023-01-23 23:49:22.487558: step: 260/77, loss: 7.094180546118878e-06 2023-01-23 23:49:23.803802: step: 264/77, loss: 0.0002328925475012511 2023-01-23 23:49:25.134269: step: 268/77, loss: 7.204769644886255e-05 2023-01-23 23:49:26.402361: step: 272/77, loss: 0.0006943023763597012 2023-01-23 23:49:27.687428: step: 276/77, loss: 0.021600721403956413 2023-01-23 23:49:28.948714: step: 280/77, loss: 3.468366776360199e-05 2023-01-23 23:49:30.244833: step: 284/77, loss: 0.0017295520519837737 2023-01-23 23:49:31.558388: step: 288/77, loss: 6.080401362851262e-05 2023-01-23 23:49:32.953524: step: 292/77, loss: 0.004709943663328886 2023-01-23 23:49:34.335733: step: 296/77, loss: 0.04869398847222328 2023-01-23 23:49:35.633847: step: 300/77, loss: 0.000548431882634759 2023-01-23 23:49:36.922445: step: 304/77, loss: 0.00472796568647027 2023-01-23 23:49:38.242392: step: 308/77, loss: 0.011098390445113182 2023-01-23 23:49:39.592752: step: 312/77, loss: 0.00011605924373725429 2023-01-23 23:49:40.893555: step: 316/77, loss: 0.0022660826798528433 2023-01-23 23:49:42.207142: step: 320/77, loss: 0.0375262089073658 2023-01-23 23:49:43.506868: step: 324/77, loss: 0.0008253601845353842 2023-01-23 23:49:44.797526: step: 328/77, loss: 0.00011603911843849346 2023-01-23 23:49:46.126606: step: 332/77, loss: 4.261704589225701e-07 2023-01-23 23:49:47.417799: step: 336/77, loss: 0.00019668148888740689 2023-01-23 23:49:48.787490: step: 340/77, loss: 0.05273066461086273 2023-01-23 23:49:50.073738: step: 344/77, loss: 7.742470188532025e-05 2023-01-23 23:49:51.323793: step: 348/77, loss: 2.0339364255050896e-06 2023-01-23 23:49:52.658604: step: 352/77, loss: 1.1309716683172155e-06 2023-01-23 23:49:53.955645: step: 356/77, loss: 1.6242236711150326e-07 2023-01-23 23:49:55.269374: step: 360/77, loss: 0.06755227595567703 2023-01-23 23:49:56.576109: step: 364/77, loss: 3.2346313219022704e-06 2023-01-23 23:49:57.893743: step: 368/77, loss: 1.4498581549560186e-05 2023-01-23 23:49:59.152658: step: 372/77, loss: 6.510221282951534e-06 2023-01-23 23:50:00.470140: step: 376/77, loss: 0.010036947205662727 2023-01-23 23:50:01.787554: step: 380/77, loss: 2.9802308176840597e-08 2023-01-23 23:50:03.042954: step: 384/77, loss: 2.184322738685296e-06 2023-01-23 23:50:04.382339: step: 388/77, loss: 0.02168990485370159 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4857142857142857, 'r': 0.03213610586011342, 'f1': 0.06028368794326241}, 'combined': 0.04018912529550827, 'epoch': 21} Test Chinese: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.6666666666666666, 'r': 0.01718213058419244, 'f1': 0.03350083752093803}, 'combined': 0.021863704487349027, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4857142857142857, 'r': 0.03213610586011342, 'f1': 0.06028368794326241}, 'combined': 0.04018912529550827, 'epoch': 21} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.6333333333333333, 'r': 0.01632302405498282, 'f1': 0.031825795644891124}, 'combined': 0.02077051926298157, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4857142857142857, 'r': 0.03213610586011342, 'f1': 0.06028368794326241}, 'combined': 0.04018912529550827, 'epoch': 21} Test Russian: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.6296296296296297, 'r': 0.014604810996563574, 'f1': 0.028547439126784216}, 'combined': 0.018630960272217063, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 21} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4857142857142857, 'r': 0.03213610586011342, 'f1': 0.06028368794326241}, 'combined': 0.04018912529550827, 'epoch': 21} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.6333333333333333, 'r': 0.01632302405498282, 'f1': 0.031825795644891124}, 'combined': 0.02077051926298157, 'epoch': 21} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 22 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:51:54.124594: step: 4/77, loss: 0.0003420588036533445 2023-01-23 23:51:55.481297: step: 8/77, loss: 0.00033664965303614736 2023-01-23 23:51:56.786539: step: 12/77, loss: 0.015224933624267578 2023-01-23 23:51:58.109301: step: 16/77, loss: 0.008086070418357849 2023-01-23 23:51:59.396319: step: 20/77, loss: 1.0142309292859863e-05 2023-01-23 23:52:00.756468: step: 24/77, loss: 3.63995241059456e-06 2023-01-23 23:52:02.032429: step: 28/77, loss: 0.0006053458782844245 2023-01-23 23:52:03.315874: step: 32/77, loss: 9.357833050671616e-07 2023-01-23 23:52:04.591639: step: 36/77, loss: 0.0037245291750878096 2023-01-23 23:52:05.894941: step: 40/77, loss: 3.310883403173648e-05 2023-01-23 23:52:07.198929: step: 44/77, loss: 5.7810040743788704e-05 2023-01-23 23:52:08.517443: step: 48/77, loss: 0.02020496316254139 2023-01-23 23:52:09.814940: step: 52/77, loss: 2.893852797569707e-05 2023-01-23 23:52:11.113747: step: 56/77, loss: 2.614892309793504e-06 2023-01-23 23:52:12.398456: step: 60/77, loss: 6.359125109156594e-05 2023-01-23 23:52:13.736678: step: 64/77, loss: 5.637254798784852e-05 2023-01-23 23:52:14.997008: step: 68/77, loss: 0.001058459049090743 2023-01-23 23:52:16.321478: step: 72/77, loss: 0.0012626081006601453 2023-01-23 23:52:17.624003: step: 76/77, loss: 1.6391270563076432e-08 2023-01-23 23:52:18.939388: step: 80/77, loss: 3.704725168063305e-05 2023-01-23 23:52:20.266359: step: 84/77, loss: 1.2144018910476007e-06 2023-01-23 23:52:21.563202: step: 88/77, loss: 0.0022035983856767416 2023-01-23 23:52:22.864072: step: 92/77, loss: 3.0500468710670248e-06 2023-01-23 23:52:24.203863: step: 96/77, loss: 0.0008798028575256467 2023-01-23 23:52:25.483153: step: 100/77, loss: 0.0002495271619409323 2023-01-23 23:52:26.827896: step: 104/77, loss: 0.00026491464814171195 2023-01-23 23:52:28.092555: step: 108/77, loss: 0.0005632839747704566 2023-01-23 23:52:29.397369: step: 112/77, loss: 0.0002990629873238504 2023-01-23 23:52:30.658978: step: 116/77, loss: 0.02526215650141239 2023-01-23 23:52:31.963909: step: 120/77, loss: 0.0003323222335893661 2023-01-23 23:52:33.314820: step: 124/77, loss: 0.01125109102576971 2023-01-23 23:52:34.557793: step: 128/77, loss: 8.091188874459476e-07 2023-01-23 23:52:35.866030: step: 132/77, loss: 4.3213336908820565e-08 2023-01-23 23:52:37.180623: step: 136/77, loss: 0.034145377576351166 2023-01-23 23:52:38.475106: step: 140/77, loss: 3.913377440767363e-05 2023-01-23 23:52:39.774878: step: 144/77, loss: 0.00012151235569035634 2023-01-23 23:52:41.082748: step: 148/77, loss: 1.001347527562757e-06 2023-01-23 23:52:42.367495: step: 152/77, loss: 0.002642788225784898 2023-01-23 23:52:43.658072: step: 156/77, loss: 1.7642250895733014e-06 2023-01-23 23:52:44.974315: step: 160/77, loss: 1.8088079741573893e-05 2023-01-23 23:52:46.289899: step: 164/77, loss: 0.04336974024772644 2023-01-23 23:52:47.618737: step: 168/77, loss: 9.991742263082415e-05 2023-01-23 23:52:48.984943: step: 172/77, loss: 0.029939396306872368 2023-01-23 23:52:50.300812: step: 176/77, loss: 0.002172111766412854 2023-01-23 23:52:51.610488: step: 180/77, loss: 0.021324358880519867 2023-01-23 23:52:52.930982: step: 184/77, loss: 0.09488627314567566 2023-01-23 23:52:54.225472: step: 188/77, loss: 0.04166953265666962 2023-01-23 23:52:55.566547: step: 192/77, loss: 1.5445612007169984e-05 2023-01-23 23:52:56.938564: step: 196/77, loss: 0.0007762728491798043 2023-01-23 23:52:58.224582: step: 200/77, loss: 2.6306921427021734e-05 2023-01-23 23:52:59.518001: step: 204/77, loss: 2.7251383016846376e-06 2023-01-23 23:53:00.846083: step: 208/77, loss: 0.001086265780031681 2023-01-23 23:53:02.119382: step: 212/77, loss: 0.1006154865026474 2023-01-23 23:53:03.468358: step: 216/77, loss: 0.00856444425880909 2023-01-23 23:53:04.796457: step: 220/77, loss: 0.002317016711458564 2023-01-23 23:53:06.068147: step: 224/77, loss: 0.00038656831020489335 2023-01-23 23:53:07.373861: step: 228/77, loss: 3.667514829430729e-05 2023-01-23 23:53:08.658413: step: 232/77, loss: 0.00040591179276816547 2023-01-23 23:53:09.972653: step: 236/77, loss: 0.00016738964768592268 2023-01-23 23:53:11.208594: step: 240/77, loss: 6.972724804654717e-05 2023-01-23 23:53:12.533489: step: 244/77, loss: 0.00665863323956728 2023-01-23 23:53:13.890923: step: 248/77, loss: 0.013775674626231194 2023-01-23 23:53:15.210945: step: 252/77, loss: 0.003292257897555828 2023-01-23 23:53:16.489662: step: 256/77, loss: 3.42726487190248e-08 2023-01-23 23:53:17.805437: step: 260/77, loss: 0.14192891120910645 2023-01-23 23:53:19.104171: step: 264/77, loss: 5.758352926932275e-06 2023-01-23 23:53:20.451251: step: 268/77, loss: 6.13920974501525e-07 2023-01-23 23:53:21.783999: step: 272/77, loss: 0.0001108912329073064 2023-01-23 23:53:23.079842: step: 276/77, loss: 2.2955689928494394e-05 2023-01-23 23:53:24.352925: step: 280/77, loss: 6.733203190378845e-05 2023-01-23 23:53:25.653133: step: 284/77, loss: 0.008186022751033306 2023-01-23 23:53:26.938217: step: 288/77, loss: 0.0001187180751003325 2023-01-23 23:53:28.232394: step: 292/77, loss: 1.2390642041282263e-05 2023-01-23 23:53:29.528194: step: 296/77, loss: 0.0025930129922926426 2023-01-23 23:53:30.808770: step: 300/77, loss: 0.007388572208583355 2023-01-23 23:53:32.166510: step: 304/77, loss: 0.0003645633696578443 2023-01-23 23:53:33.464759: step: 308/77, loss: 0.00046210913569666445 2023-01-23 23:53:34.708041: step: 312/77, loss: 4.128644650336355e-05 2023-01-23 23:53:36.034501: step: 316/77, loss: 0.0057842060923576355 2023-01-23 23:53:37.337641: step: 320/77, loss: 8.53521196404472e-05 2023-01-23 23:53:38.650116: step: 324/77, loss: 0.005444152280688286 2023-01-23 23:53:39.927401: step: 328/77, loss: 0.0006179081392474473 2023-01-23 23:53:41.234679: step: 332/77, loss: 0.0002588433271739632 2023-01-23 23:53:42.531297: step: 336/77, loss: 0.06355436146259308 2023-01-23 23:53:43.857101: step: 340/77, loss: 0.002731876913458109 2023-01-23 23:53:45.222327: step: 344/77, loss: 0.011247079819440842 2023-01-23 23:53:46.502420: step: 348/77, loss: 7.9117133282125e-05 2023-01-23 23:53:47.777505: step: 352/77, loss: 1.2233464985911269e-05 2023-01-23 23:53:49.034919: step: 356/77, loss: 1.663708644628059e-05 2023-01-23 23:53:50.330673: step: 360/77, loss: 0.00011308961984468624 2023-01-23 23:53:51.632896: step: 364/77, loss: 6.027103154337965e-05 2023-01-23 23:53:52.970554: step: 368/77, loss: 0.03185882419347763 2023-01-23 23:53:54.241902: step: 372/77, loss: 5.714062808692688e-06 2023-01-23 23:53:55.582165: step: 376/77, loss: 0.04134390130639076 2023-01-23 23:53:56.850404: step: 380/77, loss: 0.0028718234971165657 2023-01-23 23:53:58.161557: step: 384/77, loss: 8.937079655879643e-06 2023-01-23 23:53:59.457820: step: 388/77, loss: 0.0002531110367272049 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 22} Test Chinese: {'template': {'p': 0.9583333333333334, 'r': 0.5476190476190477, 'f1': 0.696969696969697}, 'slot': {'p': 0.6285714285714286, 'r': 0.018900343642611683, 'f1': 0.03669724770642201}, 'combined': 0.02557686961356686, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Korean: {'template': {'p': 0.9452054794520548, 'r': 0.5476190476190477, 'f1': 0.6934673366834171}, 'slot': {'p': 0.5789473684210527, 'r': 0.018900343642611683, 'f1': 0.03660565723793677}, 'combined': 0.02538482763233806, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Russian: {'template': {'p': 0.9577464788732394, 'r': 0.5396825396825397, 'f1': 0.6903553299492384}, 'slot': {'p': 0.5428571428571428, 'r': 0.01632302405498282, 'f1': 0.0316930775646372}, 'combined': 0.021879485019241918, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4857142857142857, 'r': 0.03213610586011342, 'f1': 0.06028368794326241}, 'combined': 0.04018912529550827, 'epoch': 21} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.6333333333333333, 'r': 0.01632302405498282, 'f1': 0.031825795644891124}, 'combined': 0.02077051926298157, 'epoch': 21} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 23 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:55:41.367678: step: 4/77, loss: 0.025402506813406944 2023-01-23 23:55:42.644072: step: 8/77, loss: 1.4856110510663711e-06 2023-01-23 23:55:43.976187: step: 12/77, loss: 0.004074436612427235 2023-01-23 23:55:45.245726: step: 16/77, loss: 0.0014559680130332708 2023-01-23 23:55:46.586929: step: 20/77, loss: 0.00022927409736439586 2023-01-23 23:55:47.836080: step: 24/77, loss: 0.0029677823185920715 2023-01-23 23:55:49.164591: step: 28/77, loss: 0.0007572559406980872 2023-01-23 23:55:50.507347: step: 32/77, loss: 0.03823195397853851 2023-01-23 23:55:51.789189: step: 36/77, loss: 0.008422516286373138 2023-01-23 23:55:53.052878: step: 40/77, loss: 6.584785296581686e-05 2023-01-23 23:55:54.382606: step: 44/77, loss: 1.0373501027061138e-05 2023-01-23 23:55:55.686232: step: 48/77, loss: 0.008303694427013397 2023-01-23 23:55:56.991991: step: 52/77, loss: 0.0062025561928749084 2023-01-23 23:55:58.269330: step: 56/77, loss: 0.06150413677096367 2023-01-23 23:55:59.581660: step: 60/77, loss: 0.0009686066769063473 2023-01-23 23:56:00.862124: step: 64/77, loss: 2.03990521185915e-06 2023-01-23 23:56:02.206305: step: 68/77, loss: 6.0856546042487025e-05 2023-01-23 23:56:03.523031: step: 72/77, loss: 1.973300641111564e-05 2023-01-23 23:56:04.802111: step: 76/77, loss: 2.571762706793379e-06 2023-01-23 23:56:06.096594: step: 80/77, loss: 0.01592273637652397 2023-01-23 23:56:07.399703: step: 84/77, loss: 0.012238798663020134 2023-01-23 23:56:08.707249: step: 88/77, loss: 2.0712558068680664e-07 2023-01-23 23:56:10.011105: step: 92/77, loss: 0.02119985967874527 2023-01-23 23:56:11.286389: step: 96/77, loss: 3.281081262684893e-06 2023-01-23 23:56:12.586485: step: 100/77, loss: 8.314575552503811e-07 2023-01-23 23:56:13.902134: step: 104/77, loss: 0.008776322938501835 2023-01-23 23:56:15.148931: step: 108/77, loss: 0.00039585179183632135 2023-01-23 23:56:16.476464: step: 112/77, loss: 7.510131467824976e-07 2023-01-23 23:56:17.793093: step: 116/77, loss: 0.005819275509566069 2023-01-23 23:56:19.060824: step: 120/77, loss: 0.0008825076511129737 2023-01-23 23:56:20.419261: step: 124/77, loss: 0.0032741157338023186 2023-01-23 23:56:21.705893: step: 128/77, loss: 0.00010210295295109972 2023-01-23 23:56:23.047992: step: 132/77, loss: 0.03434651345014572 2023-01-23 23:56:24.406319: step: 136/77, loss: 6.918517465237528e-05 2023-01-23 23:56:25.710233: step: 140/77, loss: 0.002537723630666733 2023-01-23 23:56:27.032819: step: 144/77, loss: 0.004278878215700388 2023-01-23 23:56:28.317575: step: 148/77, loss: 0.03354043513536453 2023-01-23 23:56:29.599655: step: 152/77, loss: 0.00040915823774412274 2023-01-23 23:56:30.961166: step: 156/77, loss: 3.6666804135165876e-06 2023-01-23 23:56:32.316382: step: 160/77, loss: 1.1175860947787442e-07 2023-01-23 23:56:33.676544: step: 164/77, loss: 2.7298178792989347e-06 2023-01-23 23:56:35.032605: step: 168/77, loss: 0.013857110403478146 2023-01-23 23:56:36.360953: step: 172/77, loss: 1.5452174011443276e-06 2023-01-23 23:56:37.639173: step: 176/77, loss: 0.009256785735487938 2023-01-23 23:56:38.906143: step: 180/77, loss: 8.564258678234182e-06 2023-01-23 23:56:40.244207: step: 184/77, loss: 0.07777054607868195 2023-01-23 23:56:41.542121: step: 188/77, loss: 1.4129879673419055e-05 2023-01-23 23:56:42.806411: step: 192/77, loss: 0.0021475232206285 2023-01-23 23:56:44.105882: step: 196/77, loss: 7.18229387075553e-07 2023-01-23 23:56:45.433064: step: 200/77, loss: 0.017017874866724014 2023-01-23 23:56:46.719914: step: 204/77, loss: 0.0012315193889662623 2023-01-23 23:56:48.039217: step: 208/77, loss: 0.0026159523986279964 2023-01-23 23:56:49.383629: step: 212/77, loss: 1.3963597666588612e-05 2023-01-23 23:56:50.716071: step: 216/77, loss: 7.60200564400293e-05 2023-01-23 23:56:52.052933: step: 220/77, loss: 0.00015289208386093378 2023-01-23 23:56:53.342148: step: 224/77, loss: 1.761947714840062e-05 2023-01-23 23:56:54.639079: step: 228/77, loss: 9.572540147928521e-05 2023-01-23 23:56:55.989787: step: 232/77, loss: 0.3964194059371948 2023-01-23 23:56:57.286414: step: 236/77, loss: 9.985191718442366e-05 2023-01-23 23:56:58.618245: step: 240/77, loss: 0.11299334466457367 2023-01-23 23:56:59.920184: step: 244/77, loss: 1.639120199570243e-07 2023-01-23 23:57:01.206279: step: 248/77, loss: 0.001752063282765448 2023-01-23 23:57:02.496721: step: 252/77, loss: 0.0011091380147263408 2023-01-23 23:57:03.839037: step: 256/77, loss: 0.003398521803319454 2023-01-23 23:57:05.130218: step: 260/77, loss: 0.0009565073414705694 2023-01-23 23:57:06.454245: step: 264/77, loss: 0.00032057648058980703 2023-01-23 23:57:07.712783: step: 268/77, loss: 0.0014985166490077972 2023-01-23 23:57:09.008030: step: 272/77, loss: 2.0800737274839776e-06 2023-01-23 23:57:10.322744: step: 276/77, loss: 0.004631507210433483 2023-01-23 23:57:11.660638: step: 280/77, loss: 2.6374616481916746e-06 2023-01-23 23:57:12.994109: step: 284/77, loss: 9.733112165122293e-06 2023-01-23 23:57:14.296463: step: 288/77, loss: 5.6874618167057633e-05 2023-01-23 23:57:15.603271: step: 292/77, loss: 7.215822552097961e-05 2023-01-23 23:57:16.937738: step: 296/77, loss: 0.0013572302414104342 2023-01-23 23:57:18.260940: step: 300/77, loss: 0.016739701852202415 2023-01-23 23:57:19.620957: step: 304/77, loss: 0.001509991241618991 2023-01-23 23:57:20.866580: step: 308/77, loss: 0.0048863752745091915 2023-01-23 23:57:22.156046: step: 312/77, loss: 0.011294625699520111 2023-01-23 23:57:23.442402: step: 316/77, loss: 0.00012890678772237152 2023-01-23 23:57:24.755069: step: 320/77, loss: 9.1008041636087e-06 2023-01-23 23:57:26.117912: step: 324/77, loss: 0.018667250871658325 2023-01-23 23:57:27.417994: step: 328/77, loss: 8.866010148267378e-07 2023-01-23 23:57:28.712499: step: 332/77, loss: 1.8640942016645567e-06 2023-01-23 23:57:30.025307: step: 336/77, loss: 1.7989448679145426e-05 2023-01-23 23:57:31.318480: step: 340/77, loss: 1.5984442143235356e-05 2023-01-23 23:57:32.682541: step: 344/77, loss: 2.678888813534286e-05 2023-01-23 23:57:33.941316: step: 348/77, loss: 0.003408107440918684 2023-01-23 23:57:35.223956: step: 352/77, loss: 9.685746249488147e-08 2023-01-23 23:57:36.527801: step: 356/77, loss: 1.2738772056763992e-05 2023-01-23 23:57:37.794909: step: 360/77, loss: 7.91678667155793e-06 2023-01-23 23:57:39.132425: step: 364/77, loss: 0.002775351284071803 2023-01-23 23:57:40.466130: step: 368/77, loss: 1.750870251271408e-05 2023-01-23 23:57:41.806733: step: 372/77, loss: 0.003882479388266802 2023-01-23 23:57:43.105448: step: 376/77, loss: 1.9464694560156204e-05 2023-01-23 23:57:44.420314: step: 380/77, loss: 0.00039333669701591134 2023-01-23 23:57:45.771601: step: 384/77, loss: 2.0658637367887422e-05 2023-01-23 23:57:47.105834: step: 388/77, loss: 1.4677294757348136e-06 ================================================== Loss: 0.010 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Chinese: {'template': {'p': 0.9705882352941176, 'r': 0.5238095238095238, 'f1': 0.6804123711340205}, 'slot': {'p': 0.6666666666666666, 'r': 0.020618556701030927, 'f1': 0.04}, 'combined': 0.02721649484536082, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Korean: {'template': {'p': 0.9705882352941176, 'r': 0.5238095238095238, 'f1': 0.6804123711340205}, 'slot': {'p': 0.6571428571428571, 'r': 0.019759450171821305, 'f1': 0.038365304420350285}, 'combined': 0.026104227749929057, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Russian: {'template': {'p': 0.9710144927536232, 'r': 0.5317460317460317, 'f1': 0.6871794871794872}, 'slot': {'p': 0.6486486486486487, 'r': 0.020618556701030927, 'f1': 0.03996669442131557}, 'combined': 0.027464292576698905, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4857142857142857, 'r': 0.03213610586011342, 'f1': 0.06028368794326241}, 'combined': 0.04018912529550827, 'epoch': 21} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.6333333333333333, 'r': 0.01632302405498282, 'f1': 0.031825795644891124}, 'combined': 0.02077051926298157, 'epoch': 21} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 24 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-23 23:59:28.974476: step: 4/77, loss: 9.429901183466427e-06 2023-01-23 23:59:30.326825: step: 8/77, loss: 0.0015411779750138521 2023-01-23 23:59:31.620486: step: 12/77, loss: 0.002688502660021186 2023-01-23 23:59:32.901696: step: 16/77, loss: 9.294860501540825e-05 2023-01-23 23:59:34.222225: step: 20/77, loss: 0.006230478174984455 2023-01-23 23:59:35.487251: step: 24/77, loss: 0.0002460524847265333 2023-01-23 23:59:36.803455: step: 28/77, loss: 0.000616594566963613 2023-01-23 23:59:38.151088: step: 32/77, loss: 0.0003057969734072685 2023-01-23 23:59:39.483534: step: 36/77, loss: 5.055198926129378e-06 2023-01-23 23:59:40.766013: step: 40/77, loss: 0.03458188846707344 2023-01-23 23:59:42.086790: step: 44/77, loss: 7.703703204242629e-07 2023-01-23 23:59:43.401732: step: 48/77, loss: 0.04005847126245499 2023-01-23 23:59:44.709098: step: 52/77, loss: 0.00026904334663413465 2023-01-23 23:59:46.038867: step: 56/77, loss: 5.124694143887609e-05 2023-01-23 23:59:47.383380: step: 60/77, loss: 6.460304575739428e-05 2023-01-23 23:59:48.660064: step: 64/77, loss: 0.00023380067432299256 2023-01-23 23:59:49.969573: step: 68/77, loss: 1.4930650422684266e-06 2023-01-23 23:59:51.243345: step: 72/77, loss: 0.0010681393323466182 2023-01-23 23:59:52.542293: step: 76/77, loss: 0.0012353757629171014 2023-01-23 23:59:53.789785: step: 80/77, loss: 0.003945427946746349 2023-01-23 23:59:55.044538: step: 84/77, loss: 2.697092895687092e-07 2023-01-23 23:59:56.345553: step: 88/77, loss: 2.790932012430858e-05 2023-01-23 23:59:57.671538: step: 92/77, loss: 0.00035998865496367216 2023-01-23 23:59:59.000607: step: 96/77, loss: 4.127090960537316e-06 2023-01-24 00:00:00.342749: step: 100/77, loss: 0.00021439642296172678 2023-01-24 00:00:01.654803: step: 104/77, loss: 1.5853891454753466e-05 2023-01-24 00:00:03.039400: step: 108/77, loss: 0.06753873825073242 2023-01-24 00:00:04.359150: step: 112/77, loss: 0.03835416957736015 2023-01-24 00:00:05.705505: step: 116/77, loss: 3.778714017244056e-06 2023-01-24 00:00:07.047068: step: 120/77, loss: 1.3321557162271347e-05 2023-01-24 00:00:08.335632: step: 124/77, loss: 3.7472609619726427e-06 2023-01-24 00:00:09.592100: step: 128/77, loss: 0.000342967250617221 2023-01-24 00:00:10.943553: step: 132/77, loss: 0.004019541200250387 2023-01-24 00:00:12.246896: step: 136/77, loss: 0.00020471542666200548 2023-01-24 00:00:13.556860: step: 140/77, loss: 2.682182582702808e-07 2023-01-24 00:00:14.901463: step: 144/77, loss: 0.002711113542318344 2023-01-24 00:00:16.174933: step: 148/77, loss: 8.044051355682313e-06 2023-01-24 00:00:17.474304: step: 152/77, loss: 8.048631571000442e-05 2023-01-24 00:00:18.782102: step: 156/77, loss: 0.0005561854341067374 2023-01-24 00:00:20.071372: step: 160/77, loss: 0.0027107931673526764 2023-01-24 00:00:21.388561: step: 164/77, loss: 4.043736043968238e-05 2023-01-24 00:00:22.715562: step: 168/77, loss: 0.0017298419261351228 2023-01-24 00:00:23.978165: step: 172/77, loss: 0.0002148420171579346 2023-01-24 00:00:25.279783: step: 176/77, loss: 0.00018987305520568043 2023-01-24 00:00:26.614274: step: 180/77, loss: 1.3187175227358239e-06 2023-01-24 00:00:27.935407: step: 184/77, loss: 0.002663626568391919 2023-01-24 00:00:29.202059: step: 188/77, loss: 0.0023871776647865772 2023-01-24 00:00:30.534847: step: 192/77, loss: 2.942380160675384e-05 2023-01-24 00:00:31.860318: step: 196/77, loss: 0.03128264844417572 2023-01-24 00:00:33.145370: step: 200/77, loss: 0.0009219897910952568 2023-01-24 00:00:34.460993: step: 204/77, loss: 0.0002257965534226969 2023-01-24 00:00:35.798551: step: 208/77, loss: 0.0021429036278277636 2023-01-24 00:00:37.074943: step: 212/77, loss: 3.0128776415949687e-06 2023-01-24 00:00:38.384308: step: 216/77, loss: 0.00012688209244515747 2023-01-24 00:00:39.684957: step: 220/77, loss: 0.0001109151853597723 2023-01-24 00:00:40.997873: step: 224/77, loss: 1.207463901664596e-05 2023-01-24 00:00:42.276199: step: 228/77, loss: 4.274935236026067e-06 2023-01-24 00:00:43.577794: step: 232/77, loss: 8.942193744587712e-06 2023-01-24 00:00:44.935699: step: 236/77, loss: 0.00014880349044688046 2023-01-24 00:00:46.288348: step: 240/77, loss: 4.067971701715578e-07 2023-01-24 00:00:47.621804: step: 244/77, loss: 1.275551494472893e-05 2023-01-24 00:00:48.916103: step: 248/77, loss: 7.123775503714569e-06 2023-01-24 00:00:50.233850: step: 252/77, loss: 0.03095679171383381 2023-01-24 00:00:51.521197: step: 256/77, loss: 3.175844540237449e-05 2023-01-24 00:00:52.873389: step: 260/77, loss: 1.7672155081527308e-06 2023-01-24 00:00:54.165008: step: 264/77, loss: 0.009948733262717724 2023-01-24 00:00:55.462353: step: 268/77, loss: 0.004043227061629295 2023-01-24 00:00:56.770239: step: 272/77, loss: 0.00035666185431182384 2023-01-24 00:00:58.090519: step: 276/77, loss: 4.450965570867993e-05 2023-01-24 00:00:59.403366: step: 280/77, loss: 0.00424011517316103 2023-01-24 00:01:00.745564: step: 284/77, loss: 2.2053633585983334e-07 2023-01-24 00:01:02.117054: step: 288/77, loss: 3.563959580787923e-06 2023-01-24 00:01:03.478394: step: 292/77, loss: 0.00020834298629779369 2023-01-24 00:01:04.818274: step: 296/77, loss: 4.0977698745336966e-07 2023-01-24 00:01:06.086927: step: 300/77, loss: 0.1088738888502121 2023-01-24 00:01:07.394524: step: 304/77, loss: 0.0008221596363000572 2023-01-24 00:01:08.697289: step: 308/77, loss: 1.533289605504251e-06 2023-01-24 00:01:09.985027: step: 312/77, loss: 0.00010860348993446678 2023-01-24 00:01:11.264503: step: 316/77, loss: 0.0003466178313829005 2023-01-24 00:01:12.549188: step: 320/77, loss: 2.8398906124493806e-06 2023-01-24 00:01:13.809596: step: 324/77, loss: 0.0002615092962514609 2023-01-24 00:01:15.081100: step: 328/77, loss: 3.981458121415926e-06 2023-01-24 00:01:16.357973: step: 332/77, loss: 9.101664545596577e-06 2023-01-24 00:01:17.657597: step: 336/77, loss: 3.844280854536919e-06 2023-01-24 00:01:18.976293: step: 340/77, loss: 4.366013399703661e-07 2023-01-24 00:01:20.347907: step: 344/77, loss: 1.9624180822575e-06 2023-01-24 00:01:21.688338: step: 348/77, loss: 0.00010418868623673916 2023-01-24 00:01:23.006981: step: 352/77, loss: 1.4759563782718033e-05 2023-01-24 00:01:24.336312: step: 356/77, loss: 2.355390097363852e-05 2023-01-24 00:01:25.648657: step: 360/77, loss: 1.1948211067647208e-05 2023-01-24 00:01:26.947964: step: 364/77, loss: 0.0030993595719337463 2023-01-24 00:01:28.260827: step: 368/77, loss: 4.706936579168541e-06 2023-01-24 00:01:29.543542: step: 372/77, loss: 9.344216778117698e-06 2023-01-24 00:01:30.842477: step: 376/77, loss: 2.4437713364022784e-07 2023-01-24 00:01:32.174487: step: 380/77, loss: 1.954923936864361e-05 2023-01-24 00:01:33.471537: step: 384/77, loss: 8.272416380350478e-06 2023-01-24 00:01:34.729895: step: 388/77, loss: 1.017559043248184e-05 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Chinese: {'template': {'p': 0.9855072463768116, 'r': 0.5396825396825397, 'f1': 0.6974358974358974}, 'slot': {'p': 0.6764705882352942, 'r': 0.019759450171821305, 'f1': 0.038397328881469114}, 'combined': 0.026779675527588715, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Korean: {'template': {'p': 0.9714285714285714, 'r': 0.5396825396825397, 'f1': 0.6938775510204082}, 'slot': {'p': 0.6388888888888888, 'r': 0.019759450171821305, 'f1': 0.03833333333333333}, 'combined': 0.02659863945578231, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Russian: {'template': {'p': 0.9710144927536232, 'r': 0.5317460317460317, 'f1': 0.6871794871794872}, 'slot': {'p': 0.6, 'r': 0.01804123711340206, 'f1': 0.035029190992493735}, 'combined': 0.024071341502534156, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4857142857142857, 'r': 0.03213610586011342, 'f1': 0.06028368794326241}, 'combined': 0.04018912529550827, 'epoch': 21} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.6333333333333333, 'r': 0.01632302405498282, 'f1': 0.031825795644891124}, 'combined': 0.02077051926298157, 'epoch': 21} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 25 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:03:16.791440: step: 4/77, loss: 1.0131721865036525e-05 2023-01-24 00:03:18.128780: step: 8/77, loss: 2.1794825443066657e-05 2023-01-24 00:03:19.399363: step: 12/77, loss: 0.003990492783486843 2023-01-24 00:03:20.675362: step: 16/77, loss: 2.2419793822336942e-05 2023-01-24 00:03:21.980591: step: 20/77, loss: 0.00011453872139099985 2023-01-24 00:03:23.299278: step: 24/77, loss: 0.0009024697938002646 2023-01-24 00:03:24.624207: step: 28/77, loss: 0.0009852326475083828 2023-01-24 00:03:25.938503: step: 32/77, loss: 4.193978020339273e-05 2023-01-24 00:03:27.260318: step: 36/77, loss: 9.90917442322825e-07 2023-01-24 00:03:28.553374: step: 40/77, loss: 1.4200351188264904e-06 2023-01-24 00:03:29.831251: step: 44/77, loss: 1.7364271116093732e-05 2023-01-24 00:03:31.086403: step: 48/77, loss: 0.0018864780431613326 2023-01-24 00:03:32.383367: step: 52/77, loss: 1.4304744127002778e-06 2023-01-24 00:03:33.677618: step: 56/77, loss: 8.415815500484314e-06 2023-01-24 00:03:34.996390: step: 60/77, loss: 0.00016998105274979025 2023-01-24 00:03:36.308057: step: 64/77, loss: 2.388111170148477e-05 2023-01-24 00:03:37.580373: step: 68/77, loss: 3.4421458394717774e-07 2023-01-24 00:03:38.894016: step: 72/77, loss: 5.829046585859032e-06 2023-01-24 00:03:40.175286: step: 76/77, loss: 1.5549348972854204e-05 2023-01-24 00:03:41.464015: step: 80/77, loss: 4.696151154348627e-05 2023-01-24 00:03:42.777730: step: 84/77, loss: 0.00034041781327687204 2023-01-24 00:03:44.108051: step: 88/77, loss: 4.1603518184274435e-05 2023-01-24 00:03:45.437242: step: 92/77, loss: 1.6435512861789903e-06 2023-01-24 00:03:46.757695: step: 96/77, loss: 3.1080940971150994e-05 2023-01-24 00:03:48.087023: step: 100/77, loss: 0.002379967365413904 2023-01-24 00:03:49.395022: step: 104/77, loss: 6.448364729294553e-05 2023-01-24 00:03:50.717764: step: 108/77, loss: 2.125439641531557e-05 2023-01-24 00:03:51.999807: step: 112/77, loss: 0.1124541163444519 2023-01-24 00:03:53.287982: step: 116/77, loss: 2.987526841025101e-06 2023-01-24 00:03:54.562474: step: 120/77, loss: 4.3869185901712626e-05 2023-01-24 00:03:55.837754: step: 124/77, loss: 8.517784590367228e-05 2023-01-24 00:03:57.168865: step: 128/77, loss: 0.0001356957363896072 2023-01-24 00:03:58.506461: step: 132/77, loss: 6.780152489227476e-06 2023-01-24 00:03:59.832967: step: 136/77, loss: 0.00010379239392932504 2023-01-24 00:04:01.121611: step: 140/77, loss: 2.183364995289594e-05 2023-01-24 00:04:02.430492: step: 144/77, loss: 2.7572094040806405e-05 2023-01-24 00:04:03.701304: step: 148/77, loss: 4.101280137547292e-05 2023-01-24 00:04:05.036353: step: 152/77, loss: 5.097959365230054e-05 2023-01-24 00:04:06.348176: step: 156/77, loss: 1.3338012649910524e-05 2023-01-24 00:04:07.645032: step: 160/77, loss: 1.0117785222973907e-06 2023-01-24 00:04:08.964638: step: 164/77, loss: 0.03273176774382591 2023-01-24 00:04:10.257122: step: 168/77, loss: 0.003630247199907899 2023-01-24 00:04:11.554396: step: 172/77, loss: 2.6672981334741053e-07 2023-01-24 00:04:12.888784: step: 176/77, loss: 0.003499385202303529 2023-01-24 00:04:14.189839: step: 180/77, loss: 0.00023101118858903646 2023-01-24 00:04:15.514368: step: 184/77, loss: 0.006367899943143129 2023-01-24 00:04:16.838095: step: 188/77, loss: 0.011398566886782646 2023-01-24 00:04:18.169118: step: 192/77, loss: 2.0384045456012245e-06 2023-01-24 00:04:19.473105: step: 196/77, loss: 6.287941232585581e-06 2023-01-24 00:04:20.729232: step: 200/77, loss: 0.0001234139926964417 2023-01-24 00:04:22.010158: step: 204/77, loss: 0.034921444952487946 2023-01-24 00:04:23.270113: step: 208/77, loss: 5.707017862732755e-07 2023-01-24 00:04:24.553397: step: 212/77, loss: 1.282813445868669e-05 2023-01-24 00:04:25.850894: step: 216/77, loss: 7.301513278434868e-07 2023-01-24 00:04:27.205643: step: 220/77, loss: 0.029432687908411026 2023-01-24 00:04:28.476190: step: 224/77, loss: 3.7668391996703576e-06 2023-01-24 00:04:29.805564: step: 228/77, loss: 0.010214095935225487 2023-01-24 00:04:31.074844: step: 232/77, loss: 6.202467920957133e-05 2023-01-24 00:04:32.448903: step: 236/77, loss: 0.002276056446135044 2023-01-24 00:04:33.751046: step: 240/77, loss: 1.1458730568847386e-06 2023-01-24 00:04:35.074559: step: 244/77, loss: 1.3854609278496355e-05 2023-01-24 00:04:36.353884: step: 248/77, loss: 2.090712769131642e-05 2023-01-24 00:04:37.649699: step: 252/77, loss: 0.012720501981675625 2023-01-24 00:04:38.969572: step: 256/77, loss: 0.010555337183177471 2023-01-24 00:04:40.285030: step: 260/77, loss: 0.0005867365980520844 2023-01-24 00:04:41.621732: step: 264/77, loss: 6.630651569139445e-06 2023-01-24 00:04:42.952446: step: 268/77, loss: 2.6292429538443685e-05 2023-01-24 00:04:44.283418: step: 272/77, loss: 0.0002204696647822857 2023-01-24 00:04:45.582379: step: 276/77, loss: 2.4480957563355332e-06 2023-01-24 00:04:46.931592: step: 280/77, loss: 1.9012670691154199e-06 2023-01-24 00:04:48.210125: step: 284/77, loss: 4.039056420879206e-06 2023-01-24 00:04:49.516127: step: 288/77, loss: 5.880193202756345e-05 2023-01-24 00:04:50.788753: step: 292/77, loss: 2.8312197031254982e-08 2023-01-24 00:04:52.196416: step: 296/77, loss: 5.5917907957336865e-06 2023-01-24 00:04:53.503990: step: 300/77, loss: 1.966946570064465e-07 2023-01-24 00:04:54.805340: step: 304/77, loss: 3.297445118732867e-06 2023-01-24 00:04:56.097795: step: 308/77, loss: 7.820551218173932e-06 2023-01-24 00:04:57.411294: step: 312/77, loss: 1.2486799505495583e-06 2023-01-24 00:04:58.686309: step: 316/77, loss: 3.5506375297700288e-06 2023-01-24 00:05:00.001387: step: 320/77, loss: 2.61664463323541e-05 2023-01-24 00:05:01.293102: step: 324/77, loss: 6.271308848226909e-06 2023-01-24 00:05:02.642776: step: 328/77, loss: 9.149007382802665e-07 2023-01-24 00:05:03.986493: step: 332/77, loss: 0.0011777568142861128 2023-01-24 00:05:05.316678: step: 336/77, loss: 5.036566221860994e-07 2023-01-24 00:05:06.664579: step: 340/77, loss: 0.0036230036057531834 2023-01-24 00:05:08.033556: step: 344/77, loss: 0.010900352150201797 2023-01-24 00:05:09.290707: step: 348/77, loss: 0.001082171918824315 2023-01-24 00:05:10.571359: step: 352/77, loss: 2.9057139272481436e-07 2023-01-24 00:05:11.950965: step: 356/77, loss: 2.0563533098538755e-07 2023-01-24 00:05:13.222472: step: 360/77, loss: 1.3693969549422036e-06 2023-01-24 00:05:14.539271: step: 364/77, loss: 0.006990964524447918 2023-01-24 00:05:15.882625: step: 368/77, loss: 0.004334151744842529 2023-01-24 00:05:17.188189: step: 372/77, loss: 6.668072455795482e-05 2023-01-24 00:05:18.551208: step: 376/77, loss: 0.00010231477790512145 2023-01-24 00:05:19.883007: step: 380/77, loss: 6.392461727955379e-06 2023-01-24 00:05:21.210560: step: 384/77, loss: 1.6689237725131534e-07 2023-01-24 00:05:22.535814: step: 388/77, loss: 0.011087899096310139 ================================================== Loss: 0.003 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Chinese: {'template': {'p': 0.9848484848484849, 'r': 0.5158730158730159, 'f1': 0.6770833333333334}, 'slot': {'p': 0.6571428571428571, 'r': 0.019759450171821305, 'f1': 0.038365304420350285}, 'combined': 0.02597650820127884, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Korean: {'template': {'p': 0.9848484848484849, 'r': 0.5158730158730159, 'f1': 0.6770833333333334}, 'slot': {'p': 0.6388888888888888, 'r': 0.019759450171821305, 'f1': 0.03833333333333333}, 'combined': 0.02595486111111111, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Russian: {'template': {'p': 0.9848484848484849, 'r': 0.5158730158730159, 'f1': 0.6770833333333334}, 'slot': {'p': 0.6176470588235294, 'r': 0.01804123711340206, 'f1': 0.035058430717863104}, 'combined': 0.023737479131886476, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4857142857142857, 'r': 0.03213610586011342, 'f1': 0.06028368794326241}, 'combined': 0.04018912529550827, 'epoch': 21} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.6333333333333333, 'r': 0.01632302405498282, 'f1': 0.031825795644891124}, 'combined': 0.02077051926298157, 'epoch': 21} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 26 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:07:03.740787: step: 4/77, loss: 0.0025950754061341286 2023-01-24 00:07:05.096031: step: 8/77, loss: 7.873056347307283e-06 2023-01-24 00:07:06.411556: step: 12/77, loss: 0.0008838302455842495 2023-01-24 00:07:07.711637: step: 16/77, loss: 0.0028116789180785418 2023-01-24 00:07:09.015072: step: 20/77, loss: 3.9636941551179916e-07 2023-01-24 00:07:10.306620: step: 24/77, loss: 3.18428355967626e-05 2023-01-24 00:07:11.587642: step: 28/77, loss: 8.923166751628742e-05 2023-01-24 00:07:12.941060: step: 32/77, loss: 1.8863978766603395e-05 2023-01-24 00:07:14.223517: step: 36/77, loss: 0.004659530706703663 2023-01-24 00:07:15.525067: step: 40/77, loss: 9.939018354998552e-07 2023-01-24 00:07:16.802970: step: 44/77, loss: 0.0086531862616539 2023-01-24 00:07:18.161997: step: 48/77, loss: 0.000130360946059227 2023-01-24 00:07:19.425269: step: 52/77, loss: 0.001125822658650577 2023-01-24 00:07:20.771973: step: 56/77, loss: 8.256652654381469e-05 2023-01-24 00:07:22.075610: step: 60/77, loss: 3.7252627294037666e-07 2023-01-24 00:07:23.354088: step: 64/77, loss: 0.0012953771511092782 2023-01-24 00:07:24.671021: step: 68/77, loss: 2.5371762603754178e-05 2023-01-24 00:07:26.022417: step: 72/77, loss: 0.0005739459302276373 2023-01-24 00:07:27.295365: step: 76/77, loss: 0.0006838550325483084 2023-01-24 00:07:28.625598: step: 80/77, loss: 0.0941888764500618 2023-01-24 00:07:29.949267: step: 84/77, loss: 3.7338620586524485e-06 2023-01-24 00:07:31.291988: step: 88/77, loss: 0.00012075306585757062 2023-01-24 00:07:32.681855: step: 92/77, loss: 0.00017766923701856285 2023-01-24 00:07:33.995322: step: 96/77, loss: 3.5475077311275527e-06 2023-01-24 00:07:35.341824: step: 100/77, loss: 2.7978778234682977e-05 2023-01-24 00:07:36.673028: step: 104/77, loss: 0.00035309020313434303 2023-01-24 00:07:37.982479: step: 108/77, loss: 0.000111517590994481 2023-01-24 00:07:39.293768: step: 112/77, loss: 2.7566898097575177e-07 2023-01-24 00:07:40.597584: step: 116/77, loss: 0.002493783365935087 2023-01-24 00:07:41.913207: step: 120/77, loss: 2.5427108994335867e-05 2023-01-24 00:07:43.271664: step: 124/77, loss: 4.267096301191486e-06 2023-01-24 00:07:44.545342: step: 128/77, loss: 0.0004146482970099896 2023-01-24 00:07:45.842689: step: 132/77, loss: 0.0007785240886732936 2023-01-24 00:07:47.158008: step: 136/77, loss: 2.294766545674065e-07 2023-01-24 00:07:48.459853: step: 140/77, loss: 0.00033424387220293283 2023-01-24 00:07:49.744316: step: 144/77, loss: 0.0014177103294059634 2023-01-24 00:07:51.031736: step: 148/77, loss: 0.0001023170625558123 2023-01-24 00:07:52.347171: step: 152/77, loss: 0.0001036529429256916 2023-01-24 00:07:53.693873: step: 156/77, loss: 0.008335943333804607 2023-01-24 00:07:55.054523: step: 160/77, loss: 0.0015858053229749203 2023-01-24 00:07:56.332409: step: 164/77, loss: 0.0016299583949148655 2023-01-24 00:07:57.646815: step: 168/77, loss: 0.00011356234608683735 2023-01-24 00:07:58.961394: step: 172/77, loss: 0.00010723163723014295 2023-01-24 00:08:00.240986: step: 176/77, loss: 6.407495334315172e-08 2023-01-24 00:08:01.614382: step: 180/77, loss: 4.3071326217614114e-05 2023-01-24 00:08:03.047342: step: 184/77, loss: 2.104633858834859e-05 2023-01-24 00:08:04.370937: step: 188/77, loss: 2.9412876756396145e-06 2023-01-24 00:08:05.721941: step: 192/77, loss: 1.1890861060237512e-06 2023-01-24 00:08:07.065645: step: 196/77, loss: 5.061383490101434e-05 2023-01-24 00:08:08.363204: step: 200/77, loss: 0.002078942721709609 2023-01-24 00:08:09.693841: step: 204/77, loss: 9.075264642888214e-06 2023-01-24 00:08:11.012189: step: 208/77, loss: 0.1539250761270523 2023-01-24 00:08:12.329618: step: 212/77, loss: 5.708212484023534e-05 2023-01-24 00:08:13.647151: step: 216/77, loss: 1.096989944926463e-05 2023-01-24 00:08:15.017879: step: 220/77, loss: 0.00020207525813020766 2023-01-24 00:08:16.278317: step: 224/77, loss: 7.688929599680705e-07 2023-01-24 00:08:17.564701: step: 228/77, loss: 0.0008981174323707819 2023-01-24 00:08:18.910457: step: 232/77, loss: 0.00035968711017630994 2023-01-24 00:08:20.202796: step: 236/77, loss: 1.2278413805688615e-06 2023-01-24 00:08:21.523128: step: 240/77, loss: 1.5705521718700766e-06 2023-01-24 00:08:22.846064: step: 244/77, loss: 1.9653625713544898e-06 2023-01-24 00:08:24.218980: step: 248/77, loss: 3.0335447718243813e-06 2023-01-24 00:08:25.571809: step: 252/77, loss: 3.3941450965357944e-06 2023-01-24 00:08:26.872811: step: 256/77, loss: 3.6431715670914855e-06 2023-01-24 00:08:28.212454: step: 260/77, loss: 1.8580437881610123e-06 2023-01-24 00:08:29.560893: step: 264/77, loss: 0.0007157810614444315 2023-01-24 00:08:30.883662: step: 268/77, loss: 0.0003576005110517144 2023-01-24 00:08:32.189621: step: 272/77, loss: 2.93079983748612e-06 2023-01-24 00:08:33.534906: step: 276/77, loss: 8.270012017419504e-07 2023-01-24 00:08:34.841739: step: 280/77, loss: 7.562554674223065e-05 2023-01-24 00:08:36.191248: step: 284/77, loss: 4.782815267390106e-06 2023-01-24 00:08:37.524211: step: 288/77, loss: 0.02490079402923584 2023-01-24 00:08:38.848804: step: 292/77, loss: 8.150669827955426e-07 2023-01-24 00:08:40.110756: step: 296/77, loss: 3.5999412375531392e-06 2023-01-24 00:08:41.456871: step: 300/77, loss: 4.378571247798391e-05 2023-01-24 00:08:42.773921: step: 304/77, loss: 0.00010184153506997973 2023-01-24 00:08:44.044823: step: 308/77, loss: 0.00016333050734829158 2023-01-24 00:08:45.399580: step: 312/77, loss: 0.00014269737584982067 2023-01-24 00:08:46.766916: step: 316/77, loss: 0.16972936689853668 2023-01-24 00:08:48.087305: step: 320/77, loss: 0.0005670539103448391 2023-01-24 00:08:49.397224: step: 324/77, loss: 0.003000626340508461 2023-01-24 00:08:50.741180: step: 328/77, loss: 0.00014308324898593128 2023-01-24 00:08:52.026447: step: 332/77, loss: 0.0004799831658601761 2023-01-24 00:08:53.321078: step: 336/77, loss: 0.035126760601997375 2023-01-24 00:08:54.676167: step: 340/77, loss: 1.7821416804508772e-06 2023-01-24 00:08:56.005782: step: 344/77, loss: 0.0005418871296569705 2023-01-24 00:08:57.368168: step: 348/77, loss: 0.00010020958143286407 2023-01-24 00:08:58.684524: step: 352/77, loss: 0.019325532019138336 2023-01-24 00:09:00.018134: step: 356/77, loss: 0.00025178532814607024 2023-01-24 00:09:01.286060: step: 360/77, loss: 1.534817783976905e-07 2023-01-24 00:09:02.640250: step: 364/77, loss: 0.0035209807101637125 2023-01-24 00:09:03.953468: step: 368/77, loss: 0.00038489673170261085 2023-01-24 00:09:05.261314: step: 372/77, loss: 0.02655796706676483 2023-01-24 00:09:06.575896: step: 376/77, loss: 0.00249669561162591 2023-01-24 00:09:07.891175: step: 380/77, loss: 1.6391267010362753e-08 2023-01-24 00:09:09.231960: step: 384/77, loss: 2.1010515638408833e-07 2023-01-24 00:09:10.578460: step: 388/77, loss: 7.659033371965052e-07 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 26} Test Chinese: {'template': {'p': 0.9848484848484849, 'r': 0.5158730158730159, 'f1': 0.6770833333333334}, 'slot': {'p': 0.6071428571428571, 'r': 0.014604810996563574, 'f1': 0.02852348993288591}, 'combined': 0.01931277964205817, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.0443247402847249, 'epoch': 26} Test Korean: {'template': {'p': 0.9848484848484849, 'r': 0.5158730158730159, 'f1': 0.6770833333333334}, 'slot': {'p': 0.6206896551724138, 'r': 0.015463917525773196, 'f1': 0.03017602682313495}, 'combined': 0.02043168482816429, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.04756215508903682, 'epoch': 26} Test Russian: {'template': {'p': 0.9850746268656716, 'r': 0.5238095238095238, 'f1': 0.6839378238341969}, 'slot': {'p': 0.5862068965517241, 'r': 0.014604810996563574, 'f1': 0.02849958088851635}, 'combined': 0.01949194133307854, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4857142857142857, 'r': 0.03213610586011342, 'f1': 0.06028368794326241}, 'combined': 0.04018912529550827, 'epoch': 21} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.6333333333333333, 'r': 0.01632302405498282, 'f1': 0.031825795644891124}, 'combined': 0.02077051926298157, 'epoch': 21} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 27 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:10:51.594922: step: 4/77, loss: 4.437868847162463e-05 2023-01-24 00:10:52.889970: step: 8/77, loss: 0.005772527772933245 2023-01-24 00:10:54.226230: step: 12/77, loss: 1.5690127838752232e-05 2023-01-24 00:10:55.557715: step: 16/77, loss: 0.0006909964722581208 2023-01-24 00:10:56.931593: step: 20/77, loss: 0.06149635836482048 2023-01-24 00:10:58.199149: step: 24/77, loss: 0.005781150888651609 2023-01-24 00:10:59.482088: step: 28/77, loss: 3.4421262284922705e-07 2023-01-24 00:11:00.771034: step: 32/77, loss: 0.13293160498142242 2023-01-24 00:11:02.107564: step: 36/77, loss: 0.02070668153464794 2023-01-24 00:11:03.380029: step: 40/77, loss: 2.3008822608971968e-05 2023-01-24 00:11:04.659972: step: 44/77, loss: 7.152538472610104e-08 2023-01-24 00:11:05.989165: step: 48/77, loss: 0.0042272391729056835 2023-01-24 00:11:07.283371: step: 52/77, loss: 4.4107048324804055e-07 2023-01-24 00:11:08.621260: step: 56/77, loss: 1.2517935829237103e-05 2023-01-24 00:11:09.887759: step: 60/77, loss: 0.009791519492864609 2023-01-24 00:11:11.178793: step: 64/77, loss: 9.5928437076509e-05 2023-01-24 00:11:12.449779: step: 68/77, loss: 2.2761072614230216e-05 2023-01-24 00:11:13.747146: step: 72/77, loss: 0.0007864056387916207 2023-01-24 00:11:15.037935: step: 76/77, loss: 0.0062559861689805984 2023-01-24 00:11:16.359835: step: 80/77, loss: 0.01806912012398243 2023-01-24 00:11:17.637819: step: 84/77, loss: 0.00015917871496640146 2023-01-24 00:11:19.005609: step: 88/77, loss: 3.1738213692733552e-06 2023-01-24 00:11:20.275055: step: 92/77, loss: 0.0005329230916686356 2023-01-24 00:11:21.543214: step: 96/77, loss: 0.0007474389858543873 2023-01-24 00:11:22.830634: step: 100/77, loss: 0.00035668083000928164 2023-01-24 00:11:24.148234: step: 104/77, loss: 1.0937268370980746e-06 2023-01-24 00:11:25.438142: step: 108/77, loss: 1.0261518582410645e-05 2023-01-24 00:11:26.745154: step: 112/77, loss: 0.0004404282954055816 2023-01-24 00:11:28.017463: step: 116/77, loss: 9.961081559595186e-06 2023-01-24 00:11:29.333275: step: 120/77, loss: 3.652732993941754e-05 2023-01-24 00:11:30.663412: step: 124/77, loss: 9.344317368231714e-05 2023-01-24 00:11:31.909828: step: 128/77, loss: 3.968001237808494e-06 2023-01-24 00:11:33.288450: step: 132/77, loss: 7.186968286987394e-05 2023-01-24 00:11:34.565780: step: 136/77, loss: 0.00017383920203428715 2023-01-24 00:11:35.863181: step: 140/77, loss: 0.0001228848414029926 2023-01-24 00:11:37.145899: step: 144/77, loss: 0.001438188599422574 2023-01-24 00:11:38.402989: step: 148/77, loss: 3.014988760696724e-05 2023-01-24 00:11:39.735290: step: 152/77, loss: 1.8148712115362287e-06 2023-01-24 00:11:41.093596: step: 156/77, loss: 0.005394787061959505 2023-01-24 00:11:42.376386: step: 160/77, loss: 0.0022285841405391693 2023-01-24 00:11:43.664832: step: 164/77, loss: 4.7574019845342264e-05 2023-01-24 00:11:44.913752: step: 168/77, loss: 2.1606575728583266e-07 2023-01-24 00:11:46.241326: step: 172/77, loss: 0.0009196995524689555 2023-01-24 00:11:47.514792: step: 176/77, loss: 0.001570590422488749 2023-01-24 00:11:48.817262: step: 180/77, loss: 5.289816158438043e-07 2023-01-24 00:11:50.110032: step: 184/77, loss: 2.422596480755601e-05 2023-01-24 00:11:51.329810: step: 188/77, loss: 6.407492492144229e-08 2023-01-24 00:11:52.642078: step: 192/77, loss: 4.10639904657728e-06 2023-01-24 00:11:53.997984: step: 196/77, loss: 7.424390787491575e-06 2023-01-24 00:11:55.360734: step: 200/77, loss: 8.687628906045575e-06 2023-01-24 00:11:56.664593: step: 204/77, loss: 4.291505888431857e-07 2023-01-24 00:11:57.947528: step: 208/77, loss: 0.00012478455028031021 2023-01-24 00:11:59.221847: step: 212/77, loss: 0.008036668412387371 2023-01-24 00:12:00.520223: step: 216/77, loss: 8.950755727710202e-05 2023-01-24 00:12:01.792617: step: 220/77, loss: 0.0023669220972806215 2023-01-24 00:12:03.092477: step: 224/77, loss: 1.0340979770262493e-06 2023-01-24 00:12:04.366029: step: 228/77, loss: 0.08163799345493317 2023-01-24 00:12:05.614368: step: 232/77, loss: 0.00021753522742073983 2023-01-24 00:12:06.986256: step: 236/77, loss: 9.374257388117258e-06 2023-01-24 00:12:08.332031: step: 240/77, loss: 0.00026272039394825697 2023-01-24 00:12:09.578779: step: 244/77, loss: 7.570067828055471e-05 2023-01-24 00:12:10.911391: step: 248/77, loss: 0.00039587743231095374 2023-01-24 00:12:12.202701: step: 252/77, loss: 0.0011752621503546834 2023-01-24 00:12:13.505052: step: 256/77, loss: 0.0003574644506443292 2023-01-24 00:12:14.855924: step: 260/77, loss: 1.4300309885584284e-05 2023-01-24 00:12:16.157499: step: 264/77, loss: 0.0002528098411858082 2023-01-24 00:12:17.447615: step: 268/77, loss: 0.0002186048513976857 2023-01-24 00:12:18.790529: step: 272/77, loss: 0.0008742262725718319 2023-01-24 00:12:20.129873: step: 276/77, loss: 0.01561832893639803 2023-01-24 00:12:21.481331: step: 280/77, loss: 1.1299137440801132e-05 2023-01-24 00:12:22.768851: step: 284/77, loss: 0.017644179984927177 2023-01-24 00:12:24.080374: step: 288/77, loss: 0.03774954751133919 2023-01-24 00:12:25.412556: step: 292/77, loss: 2.2633644221059512e-06 2023-01-24 00:12:26.720832: step: 296/77, loss: 0.05433797463774681 2023-01-24 00:12:28.021946: step: 300/77, loss: 5.3644168218625055e-08 2023-01-24 00:12:29.285690: step: 304/77, loss: 0.0002883031265810132 2023-01-24 00:12:30.597274: step: 308/77, loss: 0.00020126851450186223 2023-01-24 00:12:31.898063: step: 312/77, loss: 6.914063988006092e-07 2023-01-24 00:12:33.212153: step: 316/77, loss: 8.880998620952596e-07 2023-01-24 00:12:34.509791: step: 320/77, loss: 6.634901183133479e-06 2023-01-24 00:12:35.837370: step: 324/77, loss: 0.000134270143462345 2023-01-24 00:12:37.123888: step: 328/77, loss: 0.0009469084907323122 2023-01-24 00:12:38.445953: step: 332/77, loss: 2.5419665234949207e-06 2023-01-24 00:12:39.700907: step: 336/77, loss: 2.7366057111066766e-05 2023-01-24 00:12:41.018055: step: 340/77, loss: 0.00013289826165419072 2023-01-24 00:12:42.370429: step: 344/77, loss: 1.9423972844379023e-05 2023-01-24 00:12:43.652840: step: 348/77, loss: 0.004998302552849054 2023-01-24 00:12:44.922889: step: 352/77, loss: 7.904337508080062e-06 2023-01-24 00:12:46.254199: step: 356/77, loss: 0.0055083585903048515 2023-01-24 00:12:47.516108: step: 360/77, loss: 4.9315560318063945e-05 2023-01-24 00:12:48.820720: step: 364/77, loss: 1.777686293280567e-06 2023-01-24 00:12:50.095657: step: 368/77, loss: 4.1571020119590685e-06 2023-01-24 00:12:51.413348: step: 372/77, loss: 0.00030079163843765855 2023-01-24 00:12:52.735690: step: 376/77, loss: 8.303288268507458e-06 2023-01-24 00:12:54.045258: step: 380/77, loss: 0.00270866765640676 2023-01-24 00:12:55.366545: step: 384/77, loss: 8.264104690169916e-05 2023-01-24 00:12:56.689096: step: 388/77, loss: 0.0020972429774701595 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 27} Test Chinese: {'template': {'p': 0.9577464788732394, 'r': 0.5396825396825397, 'f1': 0.6903553299492384}, 'slot': {'p': 0.6190476190476191, 'r': 0.022336769759450172, 'f1': 0.04311774461028192}, 'combined': 0.029766564807098172, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 27} Test Korean: {'template': {'p': 0.9444444444444444, 'r': 0.5396825396825397, 'f1': 0.6868686868686867}, 'slot': {'p': 0.6190476190476191, 'r': 0.022336769759450172, 'f1': 0.04311774461028192}, 'combined': 0.02961622862120374, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 27} Test Russian: {'template': {'p': 0.9444444444444444, 'r': 0.5396825396825397, 'f1': 0.6868686868686867}, 'slot': {'p': 0.6190476190476191, 'r': 0.022336769759450172, 'f1': 0.04311774461028192}, 'combined': 0.02961622862120374, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4857142857142857, 'r': 0.03213610586011342, 'f1': 0.06028368794326241}, 'combined': 0.04018912529550827, 'epoch': 21} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.6333333333333333, 'r': 0.01632302405498282, 'f1': 0.031825795644891124}, 'combined': 0.02077051926298157, 'epoch': 21} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 28 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:14:37.144386: step: 4/77, loss: 8.23704685899429e-05 2023-01-24 00:14:38.493099: step: 8/77, loss: 0.03628441318869591 2023-01-24 00:14:39.812734: step: 12/77, loss: 0.0022851990070194006 2023-01-24 00:14:41.126110: step: 16/77, loss: 1.4841228903605952e-06 2023-01-24 00:14:42.415926: step: 20/77, loss: 0.0012596538290381432 2023-01-24 00:14:43.681993: step: 24/77, loss: 0.0032123939599841833 2023-01-24 00:14:44.940525: step: 28/77, loss: 2.28930566663621e-05 2023-01-24 00:14:46.303170: step: 32/77, loss: 1.264566344616469e-05 2023-01-24 00:14:47.622322: step: 36/77, loss: 0.003975663799792528 2023-01-24 00:14:48.908787: step: 40/77, loss: 4.4445740059018135e-05 2023-01-24 00:14:50.228273: step: 44/77, loss: 0.00046154105802997947 2023-01-24 00:14:51.562141: step: 48/77, loss: 3.884429588651983e-06 2023-01-24 00:14:52.868528: step: 52/77, loss: 0.00010425122309243307 2023-01-24 00:14:54.183817: step: 56/77, loss: 1.2432614312274382e-05 2023-01-24 00:14:55.467931: step: 60/77, loss: 5.903623241465539e-05 2023-01-24 00:14:56.757567: step: 64/77, loss: 0.04321976378560066 2023-01-24 00:14:58.073904: step: 68/77, loss: 0.0008174843387678266 2023-01-24 00:14:59.348653: step: 72/77, loss: 0.06357888877391815 2023-01-24 00:15:00.608232: step: 76/77, loss: 7.065344561851816e-06 2023-01-24 00:15:01.929072: step: 80/77, loss: 5.1889550377381966e-05 2023-01-24 00:15:03.266478: step: 84/77, loss: 9.714143379824236e-05 2023-01-24 00:15:04.588451: step: 88/77, loss: 0.04682736471295357 2023-01-24 00:15:05.941371: step: 92/77, loss: 1.6703713754395721e-06 2023-01-24 00:15:07.248922: step: 96/77, loss: 0.0002513027866370976 2023-01-24 00:15:08.559002: step: 100/77, loss: 5.727513325837208e-06 2023-01-24 00:15:09.855085: step: 104/77, loss: 3.5202683648094535e-05 2023-01-24 00:15:11.187662: step: 108/77, loss: 5.169656651560217e-05 2023-01-24 00:15:12.523157: step: 112/77, loss: 2.1099415334902005e-06 2023-01-24 00:15:13.835499: step: 116/77, loss: 0.0002199725859099999 2023-01-24 00:15:15.173687: step: 120/77, loss: 3.053050022572279e-06 2023-01-24 00:15:16.491289: step: 124/77, loss: 4.4143645936856046e-05 2023-01-24 00:15:17.792224: step: 128/77, loss: 0.00018353867926634848 2023-01-24 00:15:19.105848: step: 132/77, loss: 0.002969453576952219 2023-01-24 00:15:20.405803: step: 136/77, loss: 0.0013011035043746233 2023-01-24 00:15:21.713970: step: 140/77, loss: 0.00010733507224358618 2023-01-24 00:15:22.991493: step: 144/77, loss: 5.396897904574871e-06 2023-01-24 00:15:24.337254: step: 148/77, loss: 5.68100149394013e-05 2023-01-24 00:15:25.676332: step: 152/77, loss: 5.4925600124988705e-05 2023-01-24 00:15:26.984878: step: 156/77, loss: 0.007555659394711256 2023-01-24 00:15:28.330470: step: 160/77, loss: 0.0002406742423772812 2023-01-24 00:15:29.599835: step: 164/77, loss: 5.572999270953005e-07 2023-01-24 00:15:30.887628: step: 168/77, loss: 1.873036808319739e-06 2023-01-24 00:15:32.223553: step: 172/77, loss: 0.006020023487508297 2023-01-24 00:15:33.514166: step: 176/77, loss: 0.0025768810883164406 2023-01-24 00:15:34.790135: step: 180/77, loss: 0.010427961125969887 2023-01-24 00:15:36.103840: step: 184/77, loss: 2.0492634575930424e-05 2023-01-24 00:15:37.420897: step: 188/77, loss: 2.3690738089499064e-05 2023-01-24 00:15:38.753817: step: 192/77, loss: 2.4855458832462318e-05 2023-01-24 00:15:40.066433: step: 196/77, loss: 0.019199082627892494 2023-01-24 00:15:41.337202: step: 200/77, loss: 7.696493412368e-06 2023-01-24 00:15:42.611927: step: 204/77, loss: 0.00044914853060618043 2023-01-24 00:15:43.950710: step: 208/77, loss: 0.0013580780941992998 2023-01-24 00:15:45.269838: step: 212/77, loss: 0.00012708050780929625 2023-01-24 00:15:46.613360: step: 216/77, loss: 3.2884518077480607e-06 2023-01-24 00:15:47.885269: step: 220/77, loss: 5.277245691104326e-06 2023-01-24 00:15:49.159084: step: 224/77, loss: 0.00012476358097046614 2023-01-24 00:15:50.434838: step: 228/77, loss: 7.499421917600557e-05 2023-01-24 00:15:51.717518: step: 232/77, loss: 0.025630852207541466 2023-01-24 00:15:53.024930: step: 236/77, loss: 0.003062444506213069 2023-01-24 00:15:54.344565: step: 240/77, loss: 6.049780836292484e-07 2023-01-24 00:15:55.664430: step: 244/77, loss: 2.0234774638083763e-06 2023-01-24 00:15:56.995130: step: 248/77, loss: 1.480571063439129e-05 2023-01-24 00:15:58.335587: step: 252/77, loss: 8.42006120365113e-05 2023-01-24 00:15:59.622115: step: 256/77, loss: 0.00019058524048887193 2023-01-24 00:16:00.901667: step: 260/77, loss: 0.04819241166114807 2023-01-24 00:16:02.207139: step: 264/77, loss: 2.4820839826134034e-05 2023-01-24 00:16:03.565620: step: 268/77, loss: 0.03885664418339729 2023-01-24 00:16:04.866952: step: 272/77, loss: 4.239127520122565e-06 2023-01-24 00:16:06.172314: step: 276/77, loss: 1.46977827171213e-05 2023-01-24 00:16:07.493607: step: 280/77, loss: 8.833389438223094e-05 2023-01-24 00:16:08.783073: step: 284/77, loss: 1.5189934856607579e-05 2023-01-24 00:16:10.079173: step: 288/77, loss: 4.405963863973739e-06 2023-01-24 00:16:11.375389: step: 292/77, loss: 4.4703088519781886e-07 2023-01-24 00:16:12.710366: step: 296/77, loss: 4.758917839353671e-06 2023-01-24 00:16:13.949811: step: 300/77, loss: 3.8925314584048465e-05 2023-01-24 00:16:15.207104: step: 304/77, loss: 0.017981529235839844 2023-01-24 00:16:16.542255: step: 308/77, loss: 1.9454097127891146e-05 2023-01-24 00:16:17.845886: step: 312/77, loss: 2.0712566595193493e-07 2023-01-24 00:16:19.106273: step: 316/77, loss: 0.0007723932503722608 2023-01-24 00:16:20.358709: step: 320/77, loss: 1.5888794223428704e-05 2023-01-24 00:16:21.638892: step: 324/77, loss: 1.3279091035656165e-05 2023-01-24 00:16:22.930046: step: 328/77, loss: 0.0009476257837377489 2023-01-24 00:16:24.266192: step: 332/77, loss: 0.512925922870636 2023-01-24 00:16:25.612006: step: 336/77, loss: 9.29001034819521e-05 2023-01-24 00:16:26.923133: step: 340/77, loss: 1.1510443073348142e-05 2023-01-24 00:16:28.250603: step: 344/77, loss: 3.337846692375024e-07 2023-01-24 00:16:29.521931: step: 348/77, loss: 1.0945473150059115e-05 2023-01-24 00:16:30.821856: step: 352/77, loss: 0.02763889729976654 2023-01-24 00:16:32.172851: step: 356/77, loss: 0.0008513939683325589 2023-01-24 00:16:33.458633: step: 360/77, loss: 0.0003253959002904594 2023-01-24 00:16:34.741672: step: 364/77, loss: 1.0048305739474017e-05 2023-01-24 00:16:36.013392: step: 368/77, loss: 0.0001279129646718502 2023-01-24 00:16:37.333113: step: 372/77, loss: 4.5150295591156464e-07 2023-01-24 00:16:38.662151: step: 376/77, loss: 0.0006518355221487582 2023-01-24 00:16:39.989157: step: 380/77, loss: 0.025820119306445122 2023-01-24 00:16:41.319600: step: 384/77, loss: 2.8324179766059387e-06 2023-01-24 00:16:42.589080: step: 388/77, loss: 0.00029903562972322106 ================================================== Loss: 0.010 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 28} Test Chinese: {'template': {'p': 0.9714285714285714, 'r': 0.5396825396825397, 'f1': 0.6938775510204082}, 'slot': {'p': 0.6785714285714286, 'r': 0.01632302405498282, 'f1': 0.03187919463087248}, 'combined': 0.02212025749897274, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 28} Test Korean: {'template': {'p': 0.971830985915493, 'r': 0.5476190476190477, 'f1': 0.7005076142131981}, 'slot': {'p': 0.6333333333333333, 'r': 0.01632302405498282, 'f1': 0.031825795644891124}, 'combined': 0.02229421217763947, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 28} Test Russian: {'template': {'p': 0.9710144927536232, 'r': 0.5317460317460317, 'f1': 0.6871794871794872}, 'slot': {'p': 0.6428571428571429, 'r': 0.015463917525773196, 'f1': 0.030201342281879193}, 'combined': 0.020753742901393906, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4857142857142857, 'r': 0.03213610586011342, 'f1': 0.06028368794326241}, 'combined': 0.04018912529550827, 'epoch': 21} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.6333333333333333, 'r': 0.01632302405498282, 'f1': 0.031825795644891124}, 'combined': 0.02077051926298157, 'epoch': 21} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 29 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-24 00:18:22.733252: step: 4/77, loss: 1.0125928383786231e-05 2023-01-24 00:18:24.065597: step: 8/77, loss: 0.0001004632722469978 2023-01-24 00:18:25.380975: step: 12/77, loss: 3.3080419825637364e-07 2023-01-24 00:18:26.673080: step: 16/77, loss: 0.05448224022984505 2023-01-24 00:18:27.973269: step: 20/77, loss: 1.299353243666701e-06 2023-01-24 00:18:29.181297: step: 24/77, loss: 0.0009496076963841915 2023-01-24 00:18:30.455865: step: 28/77, loss: 4.876495495409472e-06 2023-01-24 00:18:31.771847: step: 32/77, loss: 0.0002735431771725416 2023-01-24 00:18:33.065515: step: 36/77, loss: 0.02843218483030796 2023-01-24 00:18:34.387978: step: 40/77, loss: 0.03640910983085632 2023-01-24 00:18:35.690505: step: 44/77, loss: 0.0013255748199298978 2023-01-24 00:18:36.994619: step: 48/77, loss: 0.004339543171226978 2023-01-24 00:18:38.293794: step: 52/77, loss: 1.2665964277402963e-07 2023-01-24 00:18:39.592954: step: 56/77, loss: 1.5124039691727376e-06 2023-01-24 00:18:40.879610: step: 60/77, loss: 2.8548668069561245e-06 2023-01-24 00:18:42.201189: step: 64/77, loss: 2.2655663997284137e-05 2023-01-24 00:18:43.472912: step: 68/77, loss: 0.00011660682503134012 2023-01-24 00:18:44.782475: step: 72/77, loss: 5.665731350745773e-06 2023-01-24 00:18:46.088810: step: 76/77, loss: 1.2989067727176007e-05 2023-01-24 00:18:47.337740: step: 80/77, loss: 2.1367504814406857e-06 2023-01-24 00:18:48.607591: step: 84/77, loss: 0.0001403230126015842 2023-01-24 00:18:49.895685: step: 88/77, loss: 0.002301453612744808 2023-01-24 00:18:51.259686: step: 92/77, loss: 0.002005885588005185 2023-01-24 00:18:52.546083: step: 96/77, loss: 1.513984716439154e-05 2023-01-24 00:18:53.819691: step: 100/77, loss: 1.181620177703735e-06 2023-01-24 00:18:55.141244: step: 104/77, loss: 0.00012567572412081063 2023-01-24 00:18:56.466794: step: 108/77, loss: 0.009385241195559502 2023-01-24 00:18:57.801756: step: 112/77, loss: 3.427238084441342e-07 2023-01-24 00:18:59.092741: step: 116/77, loss: 6.252223101910204e-05 2023-01-24 00:19:00.369942: step: 120/77, loss: 0.00014491446199826896 2023-01-24 00:19:01.728702: step: 124/77, loss: 0.022527460008859634 2023-01-24 00:19:02.993449: step: 128/77, loss: 4.6917880354158115e-06 2023-01-24 00:19:04.243100: step: 132/77, loss: 6.645842631769483e-07 2023-01-24 00:19:05.531325: step: 136/77, loss: 0.016113124787807465 2023-01-24 00:19:06.812393: step: 140/77, loss: 0.00011516553058754653 2023-01-24 00:19:08.079648: step: 144/77, loss: 3.055720662814565e-05 2023-01-24 00:19:09.358022: step: 148/77, loss: 5.319628826327971e-07 2023-01-24 00:19:10.642795: step: 152/77, loss: 1.8640930647961795e-06 2023-01-24 00:19:11.951457: step: 156/77, loss: 0.0004583533154800534 2023-01-24 00:19:13.271292: step: 160/77, loss: 2.622595616230683e-07 2023-01-24 00:19:14.582465: step: 164/77, loss: 0.00019146154227200896 2023-01-24 00:19:15.814957: step: 168/77, loss: 5.960464122267695e-09 2023-01-24 00:19:17.174019: step: 172/77, loss: 9.83475558768987e-08 2023-01-24 00:19:18.488497: step: 176/77, loss: 0.001004547462798655 2023-01-24 00:19:19.737015: step: 180/77, loss: 3.3479436751804315e-06 2023-01-24 00:19:21.051730: step: 184/77, loss: 3.759380433621118e-06 2023-01-24 00:19:22.352272: step: 188/77, loss: 0.00048191455425694585 2023-01-24 00:19:23.717132: step: 192/77, loss: 6.87026113155298e-05 2023-01-24 00:19:25.008074: step: 196/77, loss: 3.829567276625312e-07 2023-01-24 00:19:26.276782: step: 200/77, loss: 0.017844675108790398 2023-01-24 00:19:27.627528: step: 204/77, loss: 1.385804893061504e-07 2023-01-24 00:19:28.964722: step: 208/77, loss: 4.617567356035579e-06 2023-01-24 00:19:30.251545: step: 212/77, loss: 0.001500986167229712 2023-01-24 00:19:31.525825: step: 216/77, loss: 1.2352752492006402e-06 2023-01-24 00:19:32.813057: step: 220/77, loss: 8.01673536443559e-07 2023-01-24 00:19:34.127339: step: 224/77, loss: 7.582730904687196e-05 2023-01-24 00:19:35.430165: step: 228/77, loss: 0.0006666135741397738 2023-01-24 00:19:36.734337: step: 232/77, loss: 2.8460976864153054e-07 2023-01-24 00:19:38.082204: step: 236/77, loss: 1.0624310107232304e-06 2023-01-24 00:19:39.387644: step: 240/77, loss: 1.4111069503996987e-06 2023-01-24 00:19:40.672853: step: 244/77, loss: 6.237941306608263e-06 2023-01-24 00:19:41.994015: step: 248/77, loss: 5.6616836445755325e-06 2023-01-24 00:19:43.291511: step: 252/77, loss: 0.001264326274394989 2023-01-24 00:19:44.612392: step: 256/77, loss: 2.1888276933168527e-06 2023-01-24 00:19:45.929044: step: 260/77, loss: 5.51342402843602e-08 2023-01-24 00:19:47.217215: step: 264/77, loss: 8.299799105770944e-07 2023-01-24 00:19:48.468552: step: 268/77, loss: 0.0027380480896681547 2023-01-24 00:19:49.767964: step: 272/77, loss: 0.02482440136373043 2023-01-24 00:19:51.084357: step: 276/77, loss: 4.0913464545155875e-06 2023-01-24 00:19:52.411603: step: 280/77, loss: 0.00029373884899541736 2023-01-24 00:19:53.713817: step: 284/77, loss: 0.0003284189442638308 2023-01-24 00:19:55.000902: step: 288/77, loss: 1.0624784408719279e-05 2023-01-24 00:19:56.294274: step: 292/77, loss: 7.0821461122250184e-06 2023-01-24 00:19:57.535182: step: 296/77, loss: 9.566343806000077e-07 2023-01-24 00:19:58.841548: step: 300/77, loss: 3.900290175806731e-05 2023-01-24 00:20:00.164180: step: 304/77, loss: 0.0006133618298918009 2023-01-24 00:20:01.427058: step: 308/77, loss: 1.862150566012133e-05 2023-01-24 00:20:02.769919: step: 312/77, loss: 4.497852387430612e-06 2023-01-24 00:20:04.077336: step: 316/77, loss: 3.9124729482864495e-06 2023-01-24 00:20:05.385381: step: 320/77, loss: 0.03277048468589783 2023-01-24 00:20:06.673645: step: 324/77, loss: 1.156323924078606e-05 2023-01-24 00:20:07.989418: step: 328/77, loss: 0.00013471973943524063 2023-01-24 00:20:09.291035: step: 332/77, loss: 8.426280692219734e-05 2023-01-24 00:20:10.596515: step: 336/77, loss: 3.1888390594758675e-07 2023-01-24 00:20:11.956509: step: 340/77, loss: 4.12757827916721e-07 2023-01-24 00:20:13.243367: step: 344/77, loss: 0.03155367448925972 2023-01-24 00:20:14.534268: step: 348/77, loss: 2.0414552182046464e-07 2023-01-24 00:20:15.822713: step: 352/77, loss: 8.952095413405914e-06 2023-01-24 00:20:17.148153: step: 356/77, loss: 7.450580152834618e-09 2023-01-24 00:20:18.453274: step: 360/77, loss: 0.02338743396103382 2023-01-24 00:20:19.763901: step: 364/77, loss: 0.0003410083882045001 2023-01-24 00:20:21.136467: step: 368/77, loss: 2.2649727782209084e-07 2023-01-24 00:20:22.399506: step: 372/77, loss: 3.186542016919702e-05 2023-01-24 00:20:23.706145: step: 376/77, loss: 1.3230645890871529e-05 2023-01-24 00:20:24.977743: step: 380/77, loss: 9.23870828728468e-08 2023-01-24 00:20:26.281663: step: 384/77, loss: 6.874396785860881e-05 2023-01-24 00:20:27.617107: step: 388/77, loss: 8.806327400634473e-07 ================================================== Loss: 0.003 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 29} Test Chinese: {'template': {'p': 0.9701492537313433, 'r': 0.5158730158730159, 'f1': 0.6735751295336788}, 'slot': {'p': 0.6666666666666666, 'r': 0.020618556701030927, 'f1': 0.04}, 'combined': 0.02694300518134715, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 29} Test Korean: {'template': {'p': 0.9565217391304348, 'r': 0.5238095238095238, 'f1': 0.6769230769230768}, 'slot': {'p': 0.631578947368421, 'r': 0.020618556701030927, 'f1': 0.03993344425956739}, 'combined': 0.027031869960322537, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 29} Test Russian: {'template': {'p': 0.9565217391304348, 'r': 0.5238095238095238, 'f1': 0.6769230769230768}, 'slot': {'p': 0.6052631578947368, 'r': 0.019759450171821305, 'f1': 0.038269550748752074}, 'combined': 0.025905542045309093, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4857142857142857, 'r': 0.03213610586011342, 'f1': 0.06028368794326241}, 'combined': 0.04018912529550827, 'epoch': 21} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.6333333333333333, 'r': 0.01632302405498282, 'f1': 0.031825795644891124}, 'combined': 0.02077051926298157, 'epoch': 21} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2}