Command that produces this log: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> trans_rep.weight: torch.Size([1024, 2048]) >>> trans_rep.bias: torch.Size([1024]) >>> hidden_ffns.Corruplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Corruplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Cybercrimeplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Cybercrimeplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Disasterplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Disasterplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Displacementplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Displacementplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Epidemiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Epidemiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Etiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Etiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Protestplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Protestplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Terrorplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Terrorplate.layers.0.bias: torch.Size([768]) >>> template_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> type_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Corruplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Corruplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Disasterplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Disasterplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Displacementplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Displacementplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Epidemiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Epidemiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Etiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Etiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Protestplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Protestplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Terrorplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Terrorplate.layers.1.bias: torch.Size([6]) >>> completion_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Corruplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Corruplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Disasterplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Disasterplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Displacementplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Displacementplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Epidemiplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Epidemiplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Etiplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Etiplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Protestplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Protestplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Terrorplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Terrorplate.layers.1.bias: torch.Size([3]) >>> overtime_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) n_trainable_params: 582182328, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:15:10.689494: step: 4/77, loss: 1.0097957849502563 2023-01-22 09:15:12.188322: step: 8/77, loss: 0.9876657128334045 2023-01-22 09:15:13.603842: step: 12/77, loss: 1.006246566772461 2023-01-22 09:15:15.007718: step: 16/77, loss: 0.9947892427444458 2023-01-22 09:15:16.433147: step: 20/77, loss: 0.9852086305618286 2023-01-22 09:15:17.914050: step: 24/77, loss: 0.9930259585380554 2023-01-22 09:15:19.346365: step: 28/77, loss: 0.9912077784538269 2023-01-22 09:15:20.749377: step: 32/77, loss: 0.9729397296905518 2023-01-22 09:15:22.138383: step: 36/77, loss: 0.9823734164237976 2023-01-22 09:15:23.631712: step: 40/77, loss: 0.9720751643180847 2023-01-22 09:15:25.012123: step: 44/77, loss: 0.962503969669342 2023-01-22 09:15:26.460609: step: 48/77, loss: 0.9527497291564941 2023-01-22 09:15:27.857852: step: 52/77, loss: 0.9514380693435669 2023-01-22 09:15:29.268285: step: 56/77, loss: 0.9413738250732422 2023-01-22 09:15:30.715124: step: 60/77, loss: 0.925054669380188 2023-01-22 09:15:32.058352: step: 64/77, loss: 0.9168636798858643 2023-01-22 09:15:33.495628: step: 68/77, loss: 0.9203753471374512 2023-01-22 09:15:34.924307: step: 72/77, loss: 0.8916241526603699 2023-01-22 09:15:36.347040: step: 76/77, loss: 0.8798417448997498 2023-01-22 09:15:37.763258: step: 80/77, loss: 0.8681149482727051 2023-01-22 09:15:39.268946: step: 84/77, loss: 0.8756495714187622 2023-01-22 09:15:40.756279: step: 88/77, loss: 0.8527133464813232 2023-01-22 09:15:42.240158: step: 92/77, loss: 0.8297539949417114 2023-01-22 09:15:43.687744: step: 96/77, loss: 0.8228861093521118 2023-01-22 09:15:45.103485: step: 100/77, loss: 0.8132826089859009 2023-01-22 09:15:46.541161: step: 104/77, loss: 0.7874824404716492 2023-01-22 09:15:48.004095: step: 108/77, loss: 0.7657060623168945 2023-01-22 09:15:49.417044: step: 112/77, loss: 0.745045006275177 2023-01-22 09:15:50.926303: step: 116/77, loss: 0.7560614943504333 2023-01-22 09:15:52.308406: step: 120/77, loss: 0.7308070063591003 2023-01-22 09:15:53.779284: step: 124/77, loss: 0.7113009691238403 2023-01-22 09:15:55.157351: step: 128/77, loss: 0.6648023724555969 2023-01-22 09:15:56.612418: step: 132/77, loss: 0.6685682535171509 2023-01-22 09:15:58.001826: step: 136/77, loss: 0.6537583470344543 2023-01-22 09:15:59.452158: step: 140/77, loss: 0.6488573551177979 2023-01-22 09:16:00.875414: step: 144/77, loss: 0.6188699007034302 2023-01-22 09:16:02.329405: step: 148/77, loss: 0.545028567314148 2023-01-22 09:16:03.737923: step: 152/77, loss: 0.5712795257568359 2023-01-22 09:16:05.165476: step: 156/77, loss: 0.5583752393722534 2023-01-22 09:16:06.609249: step: 160/77, loss: 0.5152299404144287 2023-01-22 09:16:08.109533: step: 164/77, loss: 0.48409515619277954 2023-01-22 09:16:09.563093: step: 168/77, loss: 0.5172240138053894 2023-01-22 09:16:10.974197: step: 172/77, loss: 0.4879964590072632 2023-01-22 09:16:12.484877: step: 176/77, loss: 0.4531494379043579 2023-01-22 09:16:13.895395: step: 180/77, loss: 0.4241258203983307 2023-01-22 09:16:15.353815: step: 184/77, loss: 0.41498085856437683 2023-01-22 09:16:16.854806: step: 188/77, loss: 0.3981754183769226 2023-01-22 09:16:18.349525: step: 192/77, loss: 0.3640326261520386 2023-01-22 09:16:19.797635: step: 196/77, loss: 0.30751991271972656 2023-01-22 09:16:21.313961: step: 200/77, loss: 0.3940797448158264 2023-01-22 09:16:22.746156: step: 204/77, loss: 0.2844552993774414 2023-01-22 09:16:24.215900: step: 208/77, loss: 0.30004727840423584 2023-01-22 09:16:25.614848: step: 212/77, loss: 0.219954252243042 2023-01-22 09:16:26.957190: step: 216/77, loss: 0.38010406494140625 2023-01-22 09:16:28.367815: step: 220/77, loss: 0.2126280814409256 2023-01-22 09:16:29.842574: step: 224/77, loss: 0.18236008286476135 2023-01-22 09:16:31.290863: step: 228/77, loss: 0.16203764081001282 2023-01-22 09:16:32.817486: step: 232/77, loss: 0.21170544624328613 2023-01-22 09:16:34.300390: step: 236/77, loss: 0.17526711523532867 2023-01-22 09:16:35.767127: step: 240/77, loss: 0.13736236095428467 2023-01-22 09:16:37.233821: step: 244/77, loss: 0.29424428939819336 2023-01-22 09:16:38.691039: step: 248/77, loss: 0.15658994019031525 2023-01-22 09:16:40.226666: step: 252/77, loss: 0.24072307348251343 2023-01-22 09:16:41.688060: step: 256/77, loss: 0.15842083096504211 2023-01-22 09:16:43.209729: step: 260/77, loss: 0.14580503106117249 2023-01-22 09:16:44.673638: step: 264/77, loss: 0.07843738049268723 2023-01-22 09:16:46.073474: step: 268/77, loss: 0.15364432334899902 2023-01-22 09:16:47.582840: step: 272/77, loss: 0.14609786868095398 2023-01-22 09:16:48.983005: step: 276/77, loss: 0.10194536298513412 2023-01-22 09:16:50.428354: step: 280/77, loss: 0.11051319539546967 2023-01-22 09:16:51.818251: step: 284/77, loss: 0.32462507486343384 2023-01-22 09:16:53.279362: step: 288/77, loss: 0.12179729342460632 2023-01-22 09:16:54.754931: step: 292/77, loss: 0.24305714666843414 2023-01-22 09:16:56.176970: step: 296/77, loss: 0.06196020543575287 2023-01-22 09:16:57.672824: step: 300/77, loss: 0.03596843406558037 2023-01-22 09:16:59.085394: step: 304/77, loss: 0.17993435263633728 2023-01-22 09:17:00.606578: step: 308/77, loss: 0.15721270442008972 2023-01-22 09:17:02.056787: step: 312/77, loss: 0.1377667635679245 2023-01-22 09:17:03.460400: step: 316/77, loss: 0.07205483317375183 2023-01-22 09:17:04.884722: step: 320/77, loss: 0.10653500258922577 2023-01-22 09:17:06.382325: step: 324/77, loss: 0.09064988791942596 2023-01-22 09:17:07.849349: step: 328/77, loss: 0.09795213490724564 2023-01-22 09:17:09.270195: step: 332/77, loss: 0.17470252513885498 2023-01-22 09:17:10.678019: step: 336/77, loss: 0.06503775715827942 2023-01-22 09:17:12.114568: step: 340/77, loss: 0.12823697924613953 2023-01-22 09:17:13.582892: step: 344/77, loss: 0.08256591856479645 2023-01-22 09:17:14.939250: step: 348/77, loss: 0.08200141787528992 2023-01-22 09:17:16.336837: step: 352/77, loss: 0.13781386613845825 2023-01-22 09:17:17.745181: step: 356/77, loss: 0.04991006851196289 2023-01-22 09:17:19.190062: step: 360/77, loss: 0.3172021806240082 2023-01-22 09:17:20.657713: step: 364/77, loss: 0.07039426267147064 2023-01-22 09:17:22.108671: step: 368/77, loss: 0.05509275197982788 2023-01-22 09:17:23.544233: step: 372/77, loss: 0.06914137303829193 2023-01-22 09:17:24.972976: step: 376/77, loss: 0.10850280523300171 2023-01-22 09:17:26.387377: step: 380/77, loss: 0.13166894018650055 2023-01-22 09:17:27.787214: step: 384/77, loss: 0.07740715146064758 2023-01-22 09:17:29.270116: step: 388/77, loss: 0.06276335567235947 ================================================== Loss: 0.460 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:19:28.423208: step: 4/77, loss: 0.12122565507888794 2023-01-22 09:19:29.881262: step: 8/77, loss: 0.06975126266479492 2023-01-22 09:19:31.347041: step: 12/77, loss: 0.06883430480957031 2023-01-22 09:19:32.804378: step: 16/77, loss: 0.11424947530031204 2023-01-22 09:19:34.187419: step: 20/77, loss: 0.1402619183063507 2023-01-22 09:19:35.651575: step: 24/77, loss: 0.11052996665239334 2023-01-22 09:19:37.005854: step: 28/77, loss: 0.0690561980009079 2023-01-22 09:19:38.409346: step: 32/77, loss: 0.16084325313568115 2023-01-22 09:19:39.796148: step: 36/77, loss: 0.10384456813335419 2023-01-22 09:19:41.197995: step: 40/77, loss: 0.1343913972377777 2023-01-22 09:19:42.661908: step: 44/77, loss: 0.03423989191651344 2023-01-22 09:19:44.094177: step: 48/77, loss: 0.08117079734802246 2023-01-22 09:19:45.594320: step: 52/77, loss: 0.24305084347724915 2023-01-22 09:19:47.002164: step: 56/77, loss: 0.06955840438604355 2023-01-22 09:19:48.421452: step: 60/77, loss: 0.0856841653585434 2023-01-22 09:19:49.840376: step: 64/77, loss: 0.07438933849334717 2023-01-22 09:19:51.304228: step: 68/77, loss: 0.13662199676036835 2023-01-22 09:19:52.747317: step: 72/77, loss: 0.19227546453475952 2023-01-22 09:19:54.083266: step: 76/77, loss: 0.19997961819171906 2023-01-22 09:19:55.514182: step: 80/77, loss: 0.25327473878860474 2023-01-22 09:19:56.961336: step: 84/77, loss: 0.12387385964393616 2023-01-22 09:19:58.390438: step: 88/77, loss: 0.08649803698062897 2023-01-22 09:19:59.897495: step: 92/77, loss: 0.11727029830217361 2023-01-22 09:20:01.275340: step: 96/77, loss: 0.13385401666164398 2023-01-22 09:20:02.738493: step: 100/77, loss: 0.042006321251392365 2023-01-22 09:20:04.135509: step: 104/77, loss: 0.13730527460575104 2023-01-22 09:20:05.541484: step: 108/77, loss: 0.11692111194133759 2023-01-22 09:20:06.967195: step: 112/77, loss: 0.09458288550376892 2023-01-22 09:20:08.431457: step: 116/77, loss: 0.13422513008117676 2023-01-22 09:20:09.837695: step: 120/77, loss: 0.11068707704544067 2023-01-22 09:20:11.227330: step: 124/77, loss: 0.1207418143749237 2023-01-22 09:20:12.739693: step: 128/77, loss: 0.12179193645715714 2023-01-22 09:20:14.131213: step: 132/77, loss: 0.28321126103401184 2023-01-22 09:20:15.599818: step: 136/77, loss: 0.07898715138435364 2023-01-22 09:20:17.000970: step: 140/77, loss: 0.10080458223819733 2023-01-22 09:20:18.487169: step: 144/77, loss: 0.11456061899662018 2023-01-22 09:20:19.934504: step: 148/77, loss: 0.03268774598836899 2023-01-22 09:20:21.356131: step: 152/77, loss: 0.07138832658529282 2023-01-22 09:20:22.770019: step: 156/77, loss: 0.07606998085975647 2023-01-22 09:20:24.229154: step: 160/77, loss: 0.0508023202419281 2023-01-22 09:20:25.694554: step: 164/77, loss: 0.11992565542459488 2023-01-22 09:20:27.110995: step: 168/77, loss: 0.3071916997432709 2023-01-22 09:20:28.551645: step: 172/77, loss: 0.050045184791088104 2023-01-22 09:20:30.020567: step: 176/77, loss: 0.07701658457517624 2023-01-22 09:20:31.476522: step: 180/77, loss: 0.06962858885526657 2023-01-22 09:20:32.951928: step: 184/77, loss: 0.05321573466062546 2023-01-22 09:20:34.338945: step: 188/77, loss: 0.04733038321137428 2023-01-22 09:20:35.774185: step: 192/77, loss: 0.09735030680894852 2023-01-22 09:20:37.222037: step: 196/77, loss: 0.16800400614738464 2023-01-22 09:20:38.716413: step: 200/77, loss: 0.08375978469848633 2023-01-22 09:20:40.183319: step: 204/77, loss: 0.10231446474790573 2023-01-22 09:20:41.581834: step: 208/77, loss: 0.11460596323013306 2023-01-22 09:20:43.064560: step: 212/77, loss: 0.20619343221187592 2023-01-22 09:20:44.535905: step: 216/77, loss: 0.061705246567726135 2023-01-22 09:20:46.013852: step: 220/77, loss: 0.05100015923380852 2023-01-22 09:20:47.480102: step: 224/77, loss: 0.14486533403396606 2023-01-22 09:20:48.900614: step: 228/77, loss: 0.0911254808306694 2023-01-22 09:20:50.340027: step: 232/77, loss: 0.06624509394168854 2023-01-22 09:20:51.807278: step: 236/77, loss: 0.09048707783222198 2023-01-22 09:20:53.277279: step: 240/77, loss: 0.09039495885372162 2023-01-22 09:20:54.763574: step: 244/77, loss: 0.13376855850219727 2023-01-22 09:20:56.164407: step: 248/77, loss: 0.1051325872540474 2023-01-22 09:20:57.618543: step: 252/77, loss: 0.12940678000450134 2023-01-22 09:20:59.095685: step: 256/77, loss: 0.10960350930690765 2023-01-22 09:21:00.587819: step: 260/77, loss: 0.31249040365219116 2023-01-22 09:21:02.021634: step: 264/77, loss: 0.13202910125255585 2023-01-22 09:21:03.486607: step: 268/77, loss: 0.12233851850032806 2023-01-22 09:21:04.923330: step: 272/77, loss: 0.11027183383703232 2023-01-22 09:21:06.366902: step: 276/77, loss: 0.08666466176509857 2023-01-22 09:21:07.772764: step: 280/77, loss: 0.16495409607887268 2023-01-22 09:21:09.198699: step: 284/77, loss: 0.04284519702196121 2023-01-22 09:21:10.644343: step: 288/77, loss: 0.06679358333349228 2023-01-22 09:21:12.168657: step: 292/77, loss: 0.19842630624771118 2023-01-22 09:21:13.653920: step: 296/77, loss: 0.08360859006643295 2023-01-22 09:21:15.065387: step: 300/77, loss: 0.14133182168006897 2023-01-22 09:21:16.587294: step: 304/77, loss: 0.10215363651514053 2023-01-22 09:21:17.989871: step: 308/77, loss: 0.06244270130991936 2023-01-22 09:21:19.393136: step: 312/77, loss: 0.08266845345497131 2023-01-22 09:21:20.818786: step: 316/77, loss: 0.1152535155415535 2023-01-22 09:21:22.264374: step: 320/77, loss: 0.1293995976448059 2023-01-22 09:21:23.740023: step: 324/77, loss: 0.021759741008281708 2023-01-22 09:21:25.174805: step: 328/77, loss: 0.0577072836458683 2023-01-22 09:21:26.550546: step: 332/77, loss: 0.052957139909267426 2023-01-22 09:21:27.998530: step: 336/77, loss: 0.028006015345454216 2023-01-22 09:21:29.436490: step: 340/77, loss: 0.1874495893716812 2023-01-22 09:21:30.797586: step: 344/77, loss: 0.030715854838490486 2023-01-22 09:21:32.187183: step: 348/77, loss: 0.06883832067251205 2023-01-22 09:21:33.631503: step: 352/77, loss: 0.04792249947786331 2023-01-22 09:21:35.015560: step: 356/77, loss: 0.054040245711803436 2023-01-22 09:21:36.416032: step: 360/77, loss: 0.08866354078054428 2023-01-22 09:21:37.908084: step: 364/77, loss: 0.1664765179157257 2023-01-22 09:21:39.319417: step: 368/77, loss: 0.14805133640766144 2023-01-22 09:21:40.742006: step: 372/77, loss: 0.15928471088409424 2023-01-22 09:21:42.222258: step: 376/77, loss: 0.08161871135234833 2023-01-22 09:21:43.695043: step: 380/77, loss: 0.07126414030790329 2023-01-22 09:21:45.130151: step: 384/77, loss: 0.14317861199378967 2023-01-22 09:21:46.553994: step: 388/77, loss: 0.05555611848831177 ================================================== Loss: 0.109 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 2 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:23:28.416587: step: 4/77, loss: 0.04601486772298813 2023-01-22 09:23:29.848475: step: 8/77, loss: 0.04817475378513336 2023-01-22 09:23:31.269540: step: 12/77, loss: 0.09909090399742126 2023-01-22 09:23:32.696031: step: 16/77, loss: 0.032988566905260086 2023-01-22 09:23:34.141594: step: 20/77, loss: 0.022017795592546463 2023-01-22 09:23:35.559908: step: 24/77, loss: 0.11184781789779663 2023-01-22 09:23:36.939158: step: 28/77, loss: 0.0631086677312851 2023-01-22 09:23:38.367941: step: 32/77, loss: 0.06439824402332306 2023-01-22 09:23:39.845807: step: 36/77, loss: 0.046433161944150925 2023-01-22 09:23:41.298518: step: 40/77, loss: 0.03827742859721184 2023-01-22 09:23:42.701333: step: 44/77, loss: 0.06895600259304047 2023-01-22 09:23:44.115371: step: 48/77, loss: 0.043843068182468414 2023-01-22 09:23:45.541417: step: 52/77, loss: 0.0473325178027153 2023-01-22 09:23:46.955650: step: 56/77, loss: 0.057506125420331955 2023-01-22 09:23:48.401029: step: 60/77, loss: 0.030457310378551483 2023-01-22 09:23:49.843421: step: 64/77, loss: 0.06665453314781189 2023-01-22 09:23:51.337924: step: 68/77, loss: 0.04986963048577309 2023-01-22 09:23:52.772788: step: 72/77, loss: 0.046548400074243546 2023-01-22 09:23:54.208010: step: 76/77, loss: 0.04934310540556908 2023-01-22 09:23:55.692640: step: 80/77, loss: 0.06297309696674347 2023-01-22 09:23:57.150047: step: 84/77, loss: 0.023975176736712456 2023-01-22 09:23:58.601849: step: 88/77, loss: 0.0738765150308609 2023-01-22 09:24:00.049687: step: 92/77, loss: 0.08002398908138275 2023-01-22 09:24:01.411733: step: 96/77, loss: 0.06041925027966499 2023-01-22 09:24:02.808936: step: 100/77, loss: 0.04932462424039841 2023-01-22 09:24:04.220156: step: 104/77, loss: 0.21958592534065247 2023-01-22 09:24:05.662843: step: 108/77, loss: 0.03155907243490219 2023-01-22 09:24:07.091345: step: 112/77, loss: 0.048089973628520966 2023-01-22 09:24:08.525780: step: 116/77, loss: 0.018027935177087784 2023-01-22 09:24:09.989795: step: 120/77, loss: 0.018941737711429596 2023-01-22 09:24:11.421300: step: 124/77, loss: 0.01080845482647419 2023-01-22 09:24:12.893953: step: 128/77, loss: 0.18481628596782684 2023-01-22 09:24:14.338294: step: 132/77, loss: 0.10867591202259064 2023-01-22 09:24:15.779339: step: 136/77, loss: 0.31321001052856445 2023-01-22 09:24:17.279448: step: 140/77, loss: 0.009379186667501926 2023-01-22 09:24:18.725760: step: 144/77, loss: 0.013940312899649143 2023-01-22 09:24:20.149325: step: 148/77, loss: 0.03576299548149109 2023-01-22 09:24:21.611079: step: 152/77, loss: 0.026389723643660545 2023-01-22 09:24:23.019955: step: 156/77, loss: 0.03446223959326744 2023-01-22 09:24:24.452974: step: 160/77, loss: 0.020441459491848946 2023-01-22 09:24:25.929294: step: 164/77, loss: 0.036716707050800323 2023-01-22 09:24:27.349701: step: 168/77, loss: 0.010801638476550579 2023-01-22 09:24:28.780005: step: 172/77, loss: 0.029459888115525246 2023-01-22 09:24:30.228622: step: 176/77, loss: 0.11619746685028076 2023-01-22 09:24:31.643980: step: 180/77, loss: 0.006916141137480736 2023-01-22 09:24:33.106100: step: 184/77, loss: 0.15354542434215546 2023-01-22 09:24:34.511869: step: 188/77, loss: 0.011987213045358658 2023-01-22 09:24:35.966126: step: 192/77, loss: 0.07709883898496628 2023-01-22 09:24:37.417171: step: 196/77, loss: 0.17823737859725952 2023-01-22 09:24:38.826348: step: 200/77, loss: 0.014225020073354244 2023-01-22 09:24:40.289948: step: 204/77, loss: 0.05122522637248039 2023-01-22 09:24:41.784079: step: 208/77, loss: 0.109002485871315 2023-01-22 09:24:43.292771: step: 212/77, loss: 0.08840032666921616 2023-01-22 09:24:44.795385: step: 216/77, loss: 0.07572910189628601 2023-01-22 09:24:46.270967: step: 220/77, loss: 0.040880873799324036 2023-01-22 09:24:47.725777: step: 224/77, loss: 0.04236864671111107 2023-01-22 09:24:49.144926: step: 228/77, loss: 0.049025196582078934 2023-01-22 09:24:50.566395: step: 232/77, loss: 0.015840142965316772 2023-01-22 09:24:52.066408: step: 236/77, loss: 0.03713483363389969 2023-01-22 09:24:53.462623: step: 240/77, loss: 0.0560375340282917 2023-01-22 09:24:54.927831: step: 244/77, loss: 0.06949000805616379 2023-01-22 09:24:56.420103: step: 248/77, loss: 0.0347122959792614 2023-01-22 09:24:57.864272: step: 252/77, loss: 0.08734209835529327 2023-01-22 09:24:59.189157: step: 256/77, loss: 0.008405013009905815 2023-01-22 09:25:00.594476: step: 260/77, loss: 0.020795222371816635 2023-01-22 09:25:02.086460: step: 264/77, loss: 0.013448954559862614 2023-01-22 09:25:03.510377: step: 268/77, loss: 0.37937384843826294 2023-01-22 09:25:05.006664: step: 272/77, loss: 0.054609525948762894 2023-01-22 09:25:06.456534: step: 276/77, loss: 0.07847777754068375 2023-01-22 09:25:07.920911: step: 280/77, loss: 0.05036351457238197 2023-01-22 09:25:09.421341: step: 284/77, loss: 0.06301073729991913 2023-01-22 09:25:10.795319: step: 288/77, loss: 0.024898221716284752 2023-01-22 09:25:12.208822: step: 292/77, loss: 0.15771490335464478 2023-01-22 09:25:13.665316: step: 296/77, loss: 0.009197291918098927 2023-01-22 09:25:15.153166: step: 300/77, loss: 0.056442517787218094 2023-01-22 09:25:16.661421: step: 304/77, loss: 0.017659693956375122 2023-01-22 09:25:18.128817: step: 308/77, loss: 0.01524116937071085 2023-01-22 09:25:19.578449: step: 312/77, loss: 0.00637422502040863 2023-01-22 09:25:20.952074: step: 316/77, loss: 0.016708776354789734 2023-01-22 09:25:22.410721: step: 320/77, loss: 0.0594242662191391 2023-01-22 09:25:23.818362: step: 324/77, loss: 0.048527806997299194 2023-01-22 09:25:25.277144: step: 328/77, loss: 0.11545515805482864 2023-01-22 09:25:26.693500: step: 332/77, loss: 0.013203427195549011 2023-01-22 09:25:28.187521: step: 336/77, loss: 0.05984625220298767 2023-01-22 09:25:29.586757: step: 340/77, loss: 0.1080063208937645 2023-01-22 09:25:30.950221: step: 344/77, loss: 0.01844736561179161 2023-01-22 09:25:32.425154: step: 348/77, loss: 0.156895712018013 2023-01-22 09:25:33.814673: step: 352/77, loss: 0.05376040190458298 2023-01-22 09:25:35.200815: step: 356/77, loss: 0.012940380722284317 2023-01-22 09:25:36.742105: step: 360/77, loss: 0.02959199622273445 2023-01-22 09:25:38.162987: step: 364/77, loss: 0.015051622875034809 2023-01-22 09:25:39.616563: step: 368/77, loss: 0.026665037497878075 2023-01-22 09:25:41.048883: step: 372/77, loss: 0.026100341230630875 2023-01-22 09:25:42.494053: step: 376/77, loss: 0.017636604607105255 2023-01-22 09:25:43.947649: step: 380/77, loss: 0.09775028377771378 2023-01-22 09:25:45.404936: step: 384/77, loss: 0.06577365100383759 2023-01-22 09:25:46.899012: step: 388/77, loss: 0.013629499822854996 ================================================== Loss: 0.060 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:27:52.979299: step: 4/77, loss: 0.005971512757241726 2023-01-22 09:27:54.425538: step: 8/77, loss: 0.08986285328865051 2023-01-22 09:27:55.922420: step: 12/77, loss: 0.02581588551402092 2023-01-22 09:27:57.352396: step: 16/77, loss: 0.004304011818021536 2023-01-22 09:27:58.806609: step: 20/77, loss: 0.047521576285362244 2023-01-22 09:28:00.227857: step: 24/77, loss: 0.010420912876725197 2023-01-22 09:28:01.680602: step: 28/77, loss: 0.026402922347187996 2023-01-22 09:28:03.092261: step: 32/77, loss: 0.022887449711561203 2023-01-22 09:28:04.528018: step: 36/77, loss: 0.016893871128559113 2023-01-22 09:28:06.008818: step: 40/77, loss: 0.0178556926548481 2023-01-22 09:28:07.406849: step: 44/77, loss: 0.01378310564905405 2023-01-22 09:28:08.860180: step: 48/77, loss: 0.013840382918715477 2023-01-22 09:28:10.301616: step: 52/77, loss: 0.014913130551576614 2023-01-22 09:28:11.797599: step: 56/77, loss: 0.0072315772995352745 2023-01-22 09:28:13.284468: step: 60/77, loss: 0.06961221992969513 2023-01-22 09:28:14.708325: step: 64/77, loss: 0.011401453986763954 2023-01-22 09:28:16.112473: step: 68/77, loss: 0.0013695021625608206 2023-01-22 09:28:17.634373: step: 72/77, loss: 0.06069410964846611 2023-01-22 09:28:19.119792: step: 76/77, loss: 0.0035763708874583244 2023-01-22 09:28:20.650238: step: 80/77, loss: 0.004236510954797268 2023-01-22 09:28:22.063661: step: 84/77, loss: 0.010543843731284142 2023-01-22 09:28:23.481052: step: 88/77, loss: 0.02292841114103794 2023-01-22 09:28:24.864938: step: 92/77, loss: 0.006904575973749161 2023-01-22 09:28:26.280206: step: 96/77, loss: 0.0828978568315506 2023-01-22 09:28:27.689160: step: 100/77, loss: 0.11366309970617294 2023-01-22 09:28:29.102805: step: 104/77, loss: 0.06396742910146713 2023-01-22 09:28:30.578866: step: 108/77, loss: 0.003982246853411198 2023-01-22 09:28:32.044878: step: 112/77, loss: 0.03245307132601738 2023-01-22 09:28:33.544002: step: 116/77, loss: 0.003969680517911911 2023-01-22 09:28:35.040039: step: 120/77, loss: 0.000624045729637146 2023-01-22 09:28:36.499702: step: 124/77, loss: 0.0040672216564416885 2023-01-22 09:28:38.003296: step: 128/77, loss: 0.04338538646697998 2023-01-22 09:28:39.424694: step: 132/77, loss: 0.02423202060163021 2023-01-22 09:28:40.880533: step: 136/77, loss: 0.04370751604437828 2023-01-22 09:28:42.247366: step: 140/77, loss: 0.06708807498216629 2023-01-22 09:28:43.660340: step: 144/77, loss: 0.04993844032287598 2023-01-22 09:28:45.118795: step: 148/77, loss: 0.03304711729288101 2023-01-22 09:28:46.628244: step: 152/77, loss: 0.03260575234889984 2023-01-22 09:28:48.071250: step: 156/77, loss: 0.0576007217168808 2023-01-22 09:28:49.495873: step: 160/77, loss: 0.009530353359878063 2023-01-22 09:28:50.971532: step: 164/77, loss: 0.00638984702527523 2023-01-22 09:28:52.397754: step: 168/77, loss: 0.00925515778362751 2023-01-22 09:28:53.761192: step: 172/77, loss: 0.01808241754770279 2023-01-22 09:28:55.213736: step: 176/77, loss: 0.014505150727927685 2023-01-22 09:28:56.671326: step: 180/77, loss: 0.07409019023180008 2023-01-22 09:28:58.123964: step: 184/77, loss: 0.015948746353387833 2023-01-22 09:28:59.593535: step: 188/77, loss: 0.12979811429977417 2023-01-22 09:29:01.058700: step: 192/77, loss: 0.005695809610188007 2023-01-22 09:29:02.467075: step: 196/77, loss: 0.003760495688766241 2023-01-22 09:29:03.930954: step: 200/77, loss: 0.1478642076253891 2023-01-22 09:29:05.376181: step: 204/77, loss: 0.004346428904682398 2023-01-22 09:29:06.802342: step: 208/77, loss: 0.01687292382121086 2023-01-22 09:29:08.246653: step: 212/77, loss: 0.039575885981321335 2023-01-22 09:29:09.671638: step: 216/77, loss: 0.030470095574855804 2023-01-22 09:29:11.181066: step: 220/77, loss: 0.17650803923606873 2023-01-22 09:29:12.635512: step: 224/77, loss: 0.028570353984832764 2023-01-22 09:29:14.075156: step: 228/77, loss: 0.002066076500341296 2023-01-22 09:29:15.555309: step: 232/77, loss: 0.01230506133288145 2023-01-22 09:29:16.983364: step: 236/77, loss: 0.021028703078627586 2023-01-22 09:29:18.406476: step: 240/77, loss: 0.041566163301467896 2023-01-22 09:29:19.913059: step: 244/77, loss: 0.03247467800974846 2023-01-22 09:29:21.361111: step: 248/77, loss: 0.03296036273241043 2023-01-22 09:29:22.775702: step: 252/77, loss: 0.06024741008877754 2023-01-22 09:29:24.240918: step: 256/77, loss: 0.009157262742519379 2023-01-22 09:29:25.679774: step: 260/77, loss: 0.06374485045671463 2023-01-22 09:29:27.182361: step: 264/77, loss: 0.032092105597257614 2023-01-22 09:29:28.525555: step: 268/77, loss: 0.07532630860805511 2023-01-22 09:29:29.941643: step: 272/77, loss: 0.011283449828624725 2023-01-22 09:29:31.410056: step: 276/77, loss: 0.08449389040470123 2023-01-22 09:29:32.810409: step: 280/77, loss: 0.03654472157359123 2023-01-22 09:29:34.279363: step: 284/77, loss: 0.11908509582281113 2023-01-22 09:29:35.701984: step: 288/77, loss: 0.05096805468201637 2023-01-22 09:29:37.200593: step: 292/77, loss: 0.017733121290802956 2023-01-22 09:29:38.560155: step: 296/77, loss: 0.011005227454006672 2023-01-22 09:29:40.013864: step: 300/77, loss: 0.09521736204624176 2023-01-22 09:29:41.472681: step: 304/77, loss: 0.0839010700583458 2023-01-22 09:29:42.865497: step: 308/77, loss: 0.009480705484747887 2023-01-22 09:29:44.315365: step: 312/77, loss: 0.05214642733335495 2023-01-22 09:29:45.765824: step: 316/77, loss: 0.017187777906656265 2023-01-22 09:29:47.284116: step: 320/77, loss: 0.013568413443863392 2023-01-22 09:29:48.777944: step: 324/77, loss: 0.02575734630227089 2023-01-22 09:29:50.198881: step: 328/77, loss: 0.05008266866207123 2023-01-22 09:29:51.675874: step: 332/77, loss: 0.034605540335178375 2023-01-22 09:29:53.158273: step: 336/77, loss: 0.019046058878302574 2023-01-22 09:29:54.615077: step: 340/77, loss: 0.06698395311832428 2023-01-22 09:29:56.087136: step: 344/77, loss: 0.04300406947731972 2023-01-22 09:29:57.526865: step: 348/77, loss: 0.024131231009960175 2023-01-22 09:29:58.948168: step: 352/77, loss: 0.03506898507475853 2023-01-22 09:30:00.482397: step: 356/77, loss: 0.11473017930984497 2023-01-22 09:30:01.870227: step: 360/77, loss: 0.031168891116976738 2023-01-22 09:30:03.329813: step: 364/77, loss: 0.015158976428210735 2023-01-22 09:30:04.806772: step: 368/77, loss: 0.07996613532304764 2023-01-22 09:30:06.315156: step: 372/77, loss: 0.04004429280757904 2023-01-22 09:30:07.732011: step: 376/77, loss: 0.03189512714743614 2023-01-22 09:30:09.225674: step: 380/77, loss: 0.017405323684215546 2023-01-22 09:30:10.657143: step: 384/77, loss: 0.0033155945129692554 2023-01-22 09:30:12.145254: step: 388/77, loss: 0.029796740040183067 ================================================== Loss: 0.037 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test Chinese: {'template': {'p': 0.9803921568627451, 'r': 0.390625, 'f1': 0.5586592178770949}, 'slot': {'p': 0.6538461538461539, 'r': 0.016683022571148183, 'f1': 0.03253588516746411}, 'combined': 0.018176472160594474, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test Korean: {'template': {'p': 0.9803921568627451, 'r': 0.390625, 'f1': 0.5586592178770949}, 'slot': {'p': 0.6538461538461539, 'r': 0.016683022571148183, 'f1': 0.03253588516746411}, 'combined': 0.018176472160594474, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test Russian: {'template': {'p': 0.9803921568627451, 'r': 0.390625, 'f1': 0.5586592178770949}, 'slot': {'p': 0.6538461538461539, 'r': 0.016683022571148183, 'f1': 0.03253588516746411}, 'combined': 0.018176472160594474, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 4 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:31:54.839139: step: 4/77, loss: 0.06511770933866501 2023-01-22 09:31:56.269196: step: 8/77, loss: 0.02227628417313099 2023-01-22 09:31:57.681640: step: 12/77, loss: 0.02134661376476288 2023-01-22 09:31:59.170258: step: 16/77, loss: 0.09239403158426285 2023-01-22 09:32:00.620003: step: 20/77, loss: 0.013236569240689278 2023-01-22 09:32:02.075096: step: 24/77, loss: 0.04262635484337807 2023-01-22 09:32:03.512158: step: 28/77, loss: 0.04335910826921463 2023-01-22 09:32:04.935884: step: 32/77, loss: 0.11229270696640015 2023-01-22 09:32:06.416252: step: 36/77, loss: 0.020496727898716927 2023-01-22 09:32:07.880805: step: 40/77, loss: 0.008443008176982403 2023-01-22 09:32:09.309365: step: 44/77, loss: 0.05217698588967323 2023-01-22 09:32:10.741526: step: 48/77, loss: 0.03275950998067856 2023-01-22 09:32:12.188584: step: 52/77, loss: 0.04591575637459755 2023-01-22 09:32:13.645178: step: 56/77, loss: 0.009700889699161053 2023-01-22 09:32:15.148076: step: 60/77, loss: 0.026869626715779305 2023-01-22 09:32:16.567477: step: 64/77, loss: 0.012734868563711643 2023-01-22 09:32:17.962947: step: 68/77, loss: 0.004796100780367851 2023-01-22 09:32:19.374038: step: 72/77, loss: 0.05096709728240967 2023-01-22 09:32:20.763418: step: 76/77, loss: 0.03370915725827217 2023-01-22 09:32:22.170665: step: 80/77, loss: 0.021908100694417953 2023-01-22 09:32:23.628640: step: 84/77, loss: 0.01133977621793747 2023-01-22 09:32:25.020161: step: 88/77, loss: 0.022827230393886566 2023-01-22 09:32:26.508123: step: 92/77, loss: 0.03184092789888382 2023-01-22 09:32:27.950588: step: 96/77, loss: 0.04128416255116463 2023-01-22 09:32:29.362738: step: 100/77, loss: 0.07630272209644318 2023-01-22 09:32:30.833115: step: 104/77, loss: 0.0071154991164803505 2023-01-22 09:32:32.283979: step: 108/77, loss: 0.008922640234231949 2023-01-22 09:32:33.749963: step: 112/77, loss: 0.022041644901037216 2023-01-22 09:32:35.225598: step: 116/77, loss: 0.008678006939589977 2023-01-22 09:32:36.691203: step: 120/77, loss: 0.0035237190313637257 2023-01-22 09:32:38.090087: step: 124/77, loss: 0.025985462591052055 2023-01-22 09:32:39.461486: step: 128/77, loss: 0.0029498650692403316 2023-01-22 09:32:40.987680: step: 132/77, loss: 0.05594632774591446 2023-01-22 09:32:42.394591: step: 136/77, loss: 0.013143017888069153 2023-01-22 09:32:43.856531: step: 140/77, loss: 0.014098651707172394 2023-01-22 09:32:45.275227: step: 144/77, loss: 0.014548598788678646 2023-01-22 09:32:46.681366: step: 148/77, loss: 0.022157195955514908 2023-01-22 09:32:48.104787: step: 152/77, loss: 0.023342769593000412 2023-01-22 09:32:49.591146: step: 156/77, loss: 0.07096787542104721 2023-01-22 09:32:51.056221: step: 160/77, loss: 0.041673459112644196 2023-01-22 09:32:52.461367: step: 164/77, loss: 0.0136954914778471 2023-01-22 09:32:53.879199: step: 168/77, loss: 0.01048867218196392 2023-01-22 09:32:55.273534: step: 172/77, loss: 0.04753701761364937 2023-01-22 09:32:56.711326: step: 176/77, loss: 0.015834391117095947 2023-01-22 09:32:58.181695: step: 180/77, loss: 0.009923559613525867 2023-01-22 09:32:59.664545: step: 184/77, loss: 0.1431715339422226 2023-01-22 09:33:01.094418: step: 188/77, loss: 0.07374101132154465 2023-01-22 09:33:02.473515: step: 192/77, loss: 0.04399900138378143 2023-01-22 09:33:03.889819: step: 196/77, loss: 0.03910788521170616 2023-01-22 09:33:05.371512: step: 200/77, loss: 0.03297842666506767 2023-01-22 09:33:06.775855: step: 204/77, loss: 0.06826893985271454 2023-01-22 09:33:08.303624: step: 208/77, loss: 0.006327507086098194 2023-01-22 09:33:09.797361: step: 212/77, loss: 0.004379096440970898 2023-01-22 09:33:11.154869: step: 216/77, loss: 0.021066607907414436 2023-01-22 09:33:12.643165: step: 220/77, loss: 0.03417379409074783 2023-01-22 09:33:14.126212: step: 224/77, loss: 0.05092516541481018 2023-01-22 09:33:15.544872: step: 228/77, loss: 0.013350581750273705 2023-01-22 09:33:17.023030: step: 232/77, loss: 0.012862302362918854 2023-01-22 09:33:18.505850: step: 236/77, loss: 0.0070504057221114635 2023-01-22 09:33:19.949705: step: 240/77, loss: 0.06666646897792816 2023-01-22 09:33:21.402157: step: 244/77, loss: 0.0014184003230184317 2023-01-22 09:33:22.836680: step: 248/77, loss: 0.018787948414683342 2023-01-22 09:33:24.274945: step: 252/77, loss: 0.04476185888051987 2023-01-22 09:33:25.693267: step: 256/77, loss: 0.08340482413768768 2023-01-22 09:33:27.129693: step: 260/77, loss: 0.0024039025884121656 2023-01-22 09:33:28.485663: step: 264/77, loss: 0.025625744834542274 2023-01-22 09:33:29.954693: step: 268/77, loss: 0.013658175244927406 2023-01-22 09:33:31.373767: step: 272/77, loss: 0.062216561287641525 2023-01-22 09:33:32.788307: step: 276/77, loss: 0.03463776782155037 2023-01-22 09:33:34.251457: step: 280/77, loss: 0.003395059145987034 2023-01-22 09:33:35.687334: step: 284/77, loss: 0.10502924025058746 2023-01-22 09:33:37.091816: step: 288/77, loss: 0.06791075319051743 2023-01-22 09:33:38.529294: step: 292/77, loss: 0.04715389013290405 2023-01-22 09:33:40.021016: step: 296/77, loss: 0.024910500273108482 2023-01-22 09:33:41.463164: step: 300/77, loss: 0.02448110841214657 2023-01-22 09:33:42.937621: step: 304/77, loss: 0.015895595774054527 2023-01-22 09:33:44.406215: step: 308/77, loss: 0.05485811457037926 2023-01-22 09:33:45.832098: step: 312/77, loss: 0.05560028553009033 2023-01-22 09:33:47.323745: step: 316/77, loss: 0.057130005210638046 2023-01-22 09:33:48.718232: step: 320/77, loss: 0.020813550800085068 2023-01-22 09:33:50.120768: step: 324/77, loss: 0.05507659539580345 2023-01-22 09:33:51.616571: step: 328/77, loss: 0.042632970958948135 2023-01-22 09:33:53.082640: step: 332/77, loss: 0.017948759719729424 2023-01-22 09:33:54.488540: step: 336/77, loss: 0.025418804958462715 2023-01-22 09:33:56.004996: step: 340/77, loss: 0.01243533007800579 2023-01-22 09:33:57.438676: step: 344/77, loss: 0.008232533000409603 2023-01-22 09:33:58.908873: step: 348/77, loss: 0.005863835569471121 2023-01-22 09:34:00.396020: step: 352/77, loss: 0.0137371476739645 2023-01-22 09:34:01.854264: step: 356/77, loss: 0.04229208081960678 2023-01-22 09:34:03.266494: step: 360/77, loss: 0.018152443692088127 2023-01-22 09:34:04.699390: step: 364/77, loss: 0.016206610947847366 2023-01-22 09:34:06.136957: step: 368/77, loss: 0.05746857821941376 2023-01-22 09:34:07.618617: step: 372/77, loss: 0.049862105399370193 2023-01-22 09:34:09.044676: step: 376/77, loss: 0.00593891367316246 2023-01-22 09:34:10.464021: step: 380/77, loss: 0.02611643448472023 2023-01-22 09:34:11.913792: step: 384/77, loss: 0.0411229208111763 2023-01-22 09:34:13.395921: step: 388/77, loss: 0.017169751226902008 ================================================== Loss: 0.033 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.48333333333333334, 'f1': 0.6516853932584269}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.04367556427481521, 'epoch': 4} Test Chinese: {'template': {'p': 0.8970588235294118, 'r': 0.4765625, 'f1': 0.6224489795918369}, 'slot': {'p': 0.6111111111111112, 'r': 0.010794896957801767, 'f1': 0.02121504339440694}, 'combined': 0.01320528211284514, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.48333333333333334, 'f1': 0.6516853932584269}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.041523317092572336, 'epoch': 4} Test Korean: {'template': {'p': 0.9104477611940298, 'r': 0.4765625, 'f1': 0.6256410256410256}, 'slot': {'p': 0.6111111111111112, 'r': 0.010794896957801767, 'f1': 0.02121504339440694}, 'combined': 0.013273001508295624, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.4666666666666667, 'f1': 0.6363636363636364}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.040547063555913115, 'epoch': 4} Test Russian: {'template': {'p': 0.9104477611940298, 'r': 0.4765625, 'f1': 0.6256410256410256}, 'slot': {'p': 0.6111111111111112, 'r': 0.010794896957801767, 'f1': 0.02121504339440694}, 'combined': 0.013273001508295624, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 5 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:35:55.733234: step: 4/77, loss: 0.08117157220840454 2023-01-22 09:35:57.226454: step: 8/77, loss: 0.03891678526997566 2023-01-22 09:35:58.637175: step: 12/77, loss: 0.018629007041454315 2023-01-22 09:36:00.038208: step: 16/77, loss: 0.024092990905046463 2023-01-22 09:36:01.535313: step: 20/77, loss: 0.003024669596925378 2023-01-22 09:36:02.920555: step: 24/77, loss: 0.02051519975066185 2023-01-22 09:36:04.350546: step: 28/77, loss: 0.0053480276837944984 2023-01-22 09:36:05.824912: step: 32/77, loss: 0.0028783066663891077 2023-01-22 09:36:07.279976: step: 36/77, loss: 0.04187828302383423 2023-01-22 09:36:08.729245: step: 40/77, loss: 0.01745690405368805 2023-01-22 09:36:10.174212: step: 44/77, loss: 0.01910899206995964 2023-01-22 09:36:11.601581: step: 48/77, loss: 0.006147223990410566 2023-01-22 09:36:13.104110: step: 52/77, loss: 0.015471047721803188 2023-01-22 09:36:14.495982: step: 56/77, loss: 0.021297479048371315 2023-01-22 09:36:15.967204: step: 60/77, loss: 0.016664618626236916 2023-01-22 09:36:17.412391: step: 64/77, loss: 0.10081305354833603 2023-01-22 09:36:18.879982: step: 68/77, loss: 0.02595415711402893 2023-01-22 09:36:20.336438: step: 72/77, loss: 0.016515161842107773 2023-01-22 09:36:21.893643: step: 76/77, loss: 0.04394027963280678 2023-01-22 09:36:23.326428: step: 80/77, loss: 0.010788904502987862 2023-01-22 09:36:24.743504: step: 84/77, loss: 0.014180425554513931 2023-01-22 09:36:26.168216: step: 88/77, loss: 0.021911922842264175 2023-01-22 09:36:27.635834: step: 92/77, loss: 0.04131493717432022 2023-01-22 09:36:29.071529: step: 96/77, loss: 0.02815232425928116 2023-01-22 09:36:30.483614: step: 100/77, loss: 0.04688156023621559 2023-01-22 09:36:31.949620: step: 104/77, loss: 0.04534637928009033 2023-01-22 09:36:33.374032: step: 108/77, loss: 0.016092343255877495 2023-01-22 09:36:34.832061: step: 112/77, loss: 0.035738199949264526 2023-01-22 09:36:36.259326: step: 116/77, loss: 0.00904413778334856 2023-01-22 09:36:37.687754: step: 120/77, loss: 0.03285384550690651 2023-01-22 09:36:39.087481: step: 124/77, loss: 0.005159964319318533 2023-01-22 09:36:40.488736: step: 128/77, loss: 0.15879254043102264 2023-01-22 09:36:41.950526: step: 132/77, loss: 0.005956803448498249 2023-01-22 09:36:43.380405: step: 136/77, loss: 0.012946855276823044 2023-01-22 09:36:44.758563: step: 140/77, loss: 0.0126056969165802 2023-01-22 09:36:46.216411: step: 144/77, loss: 0.021381687372922897 2023-01-22 09:36:47.637219: step: 148/77, loss: 0.0045577725395560265 2023-01-22 09:36:49.116193: step: 152/77, loss: 0.22539357841014862 2023-01-22 09:36:50.508815: step: 156/77, loss: 0.010866387747228146 2023-01-22 09:36:52.012725: step: 160/77, loss: 0.0005665660719387233 2023-01-22 09:36:53.489160: step: 164/77, loss: 0.03323635086417198 2023-01-22 09:36:54.957864: step: 168/77, loss: 0.008901823312044144 2023-01-22 09:36:56.434819: step: 172/77, loss: 0.06111481040716171 2023-01-22 09:36:57.933870: step: 176/77, loss: 0.0703057274222374 2023-01-22 09:36:59.338062: step: 180/77, loss: 0.02014414593577385 2023-01-22 09:37:00.751059: step: 184/77, loss: 0.03161013126373291 2023-01-22 09:37:02.181710: step: 188/77, loss: 0.002897855592891574 2023-01-22 09:37:03.650138: step: 192/77, loss: 0.016166338697075844 2023-01-22 09:37:05.095018: step: 196/77, loss: 0.018734036013484 2023-01-22 09:37:06.514152: step: 200/77, loss: 0.06107534095644951 2023-01-22 09:37:07.937104: step: 204/77, loss: 0.002570316195487976 2023-01-22 09:37:09.421158: step: 208/77, loss: 0.10831344127655029 2023-01-22 09:37:10.885681: step: 212/77, loss: 0.003239275421947241 2023-01-22 09:37:12.341879: step: 216/77, loss: 0.03504738211631775 2023-01-22 09:37:13.772345: step: 220/77, loss: 0.05260982736945152 2023-01-22 09:37:15.205655: step: 224/77, loss: 0.002353373449295759 2023-01-22 09:37:16.675696: step: 228/77, loss: 0.05163984000682831 2023-01-22 09:37:18.203622: step: 232/77, loss: 0.02085479535162449 2023-01-22 09:37:19.711815: step: 236/77, loss: 0.009827936068177223 2023-01-22 09:37:21.193280: step: 240/77, loss: 0.025967005640268326 2023-01-22 09:37:22.639706: step: 244/77, loss: 0.004151183180510998 2023-01-22 09:37:24.067372: step: 248/77, loss: 0.0167071670293808 2023-01-22 09:37:25.530793: step: 252/77, loss: 0.048930615186691284 2023-01-22 09:37:26.931669: step: 256/77, loss: 0.06841599941253662 2023-01-22 09:37:28.421052: step: 260/77, loss: 0.027259133756160736 2023-01-22 09:37:29.877834: step: 264/77, loss: 0.07450323551893234 2023-01-22 09:37:31.411465: step: 268/77, loss: 0.04453302174806595 2023-01-22 09:37:32.830558: step: 272/77, loss: 0.006317660212516785 2023-01-22 09:37:34.239795: step: 276/77, loss: 0.015550298616290092 2023-01-22 09:37:35.674047: step: 280/77, loss: 0.04075699299573898 2023-01-22 09:37:37.071817: step: 284/77, loss: 0.028539283201098442 2023-01-22 09:37:38.529026: step: 288/77, loss: 0.09774263203144073 2023-01-22 09:37:40.046399: step: 292/77, loss: 0.011181055568158627 2023-01-22 09:37:41.471594: step: 296/77, loss: 0.014936857856810093 2023-01-22 09:37:42.912001: step: 300/77, loss: 0.04972505569458008 2023-01-22 09:37:44.261829: step: 304/77, loss: 0.029812775552272797 2023-01-22 09:37:45.695544: step: 308/77, loss: 0.06259914487600327 2023-01-22 09:37:47.191687: step: 312/77, loss: 0.0034338203258812428 2023-01-22 09:37:48.668019: step: 316/77, loss: 0.07357652485370636 2023-01-22 09:37:50.075783: step: 320/77, loss: 0.013458916917443275 2023-01-22 09:37:51.557767: step: 324/77, loss: 0.10706418007612228 2023-01-22 09:37:52.974074: step: 328/77, loss: 0.061417315155267715 2023-01-22 09:37:54.441342: step: 332/77, loss: 0.013865873217582703 2023-01-22 09:37:55.873217: step: 336/77, loss: 0.023164033889770508 2023-01-22 09:37:57.311490: step: 340/77, loss: 0.017712213099002838 2023-01-22 09:37:58.773245: step: 344/77, loss: 0.002024821937084198 2023-01-22 09:38:00.183326: step: 348/77, loss: 0.00024450241471640766 2023-01-22 09:38:01.628105: step: 352/77, loss: 0.006608347408473492 2023-01-22 09:38:03.087551: step: 356/77, loss: 0.08975375443696976 2023-01-22 09:38:04.561442: step: 360/77, loss: 0.0048473007045686245 2023-01-22 09:38:05.957374: step: 364/77, loss: 0.03499084711074829 2023-01-22 09:38:07.385559: step: 368/77, loss: 0.026558957993984222 2023-01-22 09:38:08.833523: step: 372/77, loss: 0.00982342567294836 2023-01-22 09:38:10.295373: step: 376/77, loss: 0.003917289432138205 2023-01-22 09:38:11.738175: step: 380/77, loss: 0.03307020291686058 2023-01-22 09:38:13.229345: step: 384/77, loss: 0.06564565747976303 2023-01-22 09:38:14.643921: step: 388/77, loss: 0.06091843172907829 ================================================== Loss: 0.033 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.3333333333333333, 'f1': 0.5}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.035149384885764495, 'epoch': 5} Test Chinese: {'template': {'p': 0.9193548387096774, 'r': 0.4453125, 'f1': 0.5999999999999999}, 'slot': {'p': 0.6190476190476191, 'r': 0.012757605495583905, 'f1': 0.024999999999999998}, 'combined': 0.014999999999999996, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.3333333333333333, 'f1': 0.5}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.035149384885764495, 'epoch': 5} Test Korean: {'template': {'p': 0.9193548387096774, 'r': 0.4453125, 'f1': 0.5999999999999999}, 'slot': {'p': 0.6190476190476191, 'r': 0.012757605495583905, 'f1': 0.024999999999999998}, 'combined': 0.014999999999999996, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.3333333333333333, 'f1': 0.5}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.035149384885764495, 'epoch': 5} Test Russian: {'template': {'p': 0.9047619047619048, 'r': 0.4453125, 'f1': 0.5968586387434556}, 'slot': {'p': 0.6190476190476191, 'r': 0.012757605495583905, 'f1': 0.024999999999999998}, 'combined': 0.014921465968586388, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.021621621621621623, 'epoch': 5} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.025806451612903226, 'epoch': 5} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 6 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:39:59.850502: step: 4/77, loss: 0.01459469459950924 2023-01-22 09:40:01.387599: step: 8/77, loss: 0.3601420819759369 2023-01-22 09:40:02.876174: step: 12/77, loss: 0.005000119097530842 2023-01-22 09:40:04.330628: step: 16/77, loss: 0.013874746859073639 2023-01-22 09:40:05.700062: step: 20/77, loss: 0.010326599702239037 2023-01-22 09:40:07.197832: step: 24/77, loss: 0.043253205716609955 2023-01-22 09:40:08.665621: step: 28/77, loss: 0.02528899721801281 2023-01-22 09:40:10.088568: step: 32/77, loss: 0.013174970634281635 2023-01-22 09:40:11.470589: step: 36/77, loss: 0.008078305050730705 2023-01-22 09:40:12.962051: step: 40/77, loss: 0.015973033383488655 2023-01-22 09:40:14.476580: step: 44/77, loss: 0.02415720373392105 2023-01-22 09:40:16.007618: step: 48/77, loss: 0.0253595057874918 2023-01-22 09:40:17.412947: step: 52/77, loss: 0.027250634506344795 2023-01-22 09:40:18.868202: step: 56/77, loss: 0.013301963917911053 2023-01-22 09:40:20.327350: step: 60/77, loss: 0.026193415746092796 2023-01-22 09:40:21.825609: step: 64/77, loss: 0.004933602176606655 2023-01-22 09:40:23.322529: step: 68/77, loss: 0.0014216202544048429 2023-01-22 09:40:24.757128: step: 72/77, loss: 0.018952487036585808 2023-01-22 09:40:26.193776: step: 76/77, loss: 0.052897218614816666 2023-01-22 09:40:27.551547: step: 80/77, loss: 0.0038756094872951508 2023-01-22 09:40:28.977916: step: 84/77, loss: 0.005164342932403088 2023-01-22 09:40:30.411138: step: 88/77, loss: 0.0174753088504076 2023-01-22 09:40:31.846129: step: 92/77, loss: 0.03833255544304848 2023-01-22 09:40:33.273799: step: 96/77, loss: 0.009861546568572521 2023-01-22 09:40:34.737702: step: 100/77, loss: 0.027784064412117004 2023-01-22 09:40:36.270411: step: 104/77, loss: 0.017444532364606857 2023-01-22 09:40:37.705976: step: 108/77, loss: 0.017834315076470375 2023-01-22 09:40:39.244391: step: 112/77, loss: 0.005123637616634369 2023-01-22 09:40:40.668325: step: 116/77, loss: 0.0101096136495471 2023-01-22 09:40:42.175812: step: 120/77, loss: 0.024699628353118896 2023-01-22 09:40:43.680349: step: 124/77, loss: 0.034847695380449295 2023-01-22 09:40:45.168768: step: 128/77, loss: 0.005995428655296564 2023-01-22 09:40:46.615665: step: 132/77, loss: 0.04109932854771614 2023-01-22 09:40:48.064937: step: 136/77, loss: 0.0018515828996896744 2023-01-22 09:40:49.527229: step: 140/77, loss: 0.00424737436696887 2023-01-22 09:40:51.011937: step: 144/77, loss: 0.02598082274198532 2023-01-22 09:40:52.485962: step: 148/77, loss: 0.009949353523552418 2023-01-22 09:40:53.974566: step: 152/77, loss: 0.05457217991352081 2023-01-22 09:40:55.446579: step: 156/77, loss: 0.04540511220693588 2023-01-22 09:40:56.958044: step: 160/77, loss: 0.03349992260336876 2023-01-22 09:40:58.445172: step: 164/77, loss: 0.004050153307616711 2023-01-22 09:40:59.966706: step: 168/77, loss: 0.0016534986207261682 2023-01-22 09:41:01.397230: step: 172/77, loss: 0.006821885704994202 2023-01-22 09:41:02.852847: step: 176/77, loss: 0.012208542786538601 2023-01-22 09:41:04.254450: step: 180/77, loss: 0.03986116126179695 2023-01-22 09:41:05.739203: step: 184/77, loss: 0.03070826269686222 2023-01-22 09:41:07.133455: step: 188/77, loss: 0.010352972894906998 2023-01-22 09:41:08.557306: step: 192/77, loss: 0.005710075609385967 2023-01-22 09:41:10.041738: step: 196/77, loss: 0.007689544465392828 2023-01-22 09:41:11.484844: step: 200/77, loss: 0.022643158212304115 2023-01-22 09:41:13.031671: step: 204/77, loss: 0.002715296810492873 2023-01-22 09:41:14.494447: step: 208/77, loss: 0.0020200214348733425 2023-01-22 09:41:16.028623: step: 212/77, loss: 0.008939304389059544 2023-01-22 09:41:17.524654: step: 216/77, loss: 0.0013139714719727635 2023-01-22 09:41:18.935799: step: 220/77, loss: 0.00673355907201767 2023-01-22 09:41:20.362452: step: 224/77, loss: 0.018460728228092194 2023-01-22 09:41:21.926412: step: 228/77, loss: 0.05799287185072899 2023-01-22 09:41:23.352478: step: 232/77, loss: 0.011782975867390633 2023-01-22 09:41:24.813465: step: 236/77, loss: 0.008568809367716312 2023-01-22 09:41:26.278376: step: 240/77, loss: 0.0007917608018033206 2023-01-22 09:41:27.742112: step: 244/77, loss: 0.029287803918123245 2023-01-22 09:41:29.251997: step: 248/77, loss: 0.03426426649093628 2023-01-22 09:41:30.779913: step: 252/77, loss: 0.047590676695108414 2023-01-22 09:41:32.263842: step: 256/77, loss: 0.35922297835350037 2023-01-22 09:41:33.700687: step: 260/77, loss: 0.009143169037997723 2023-01-22 09:41:35.093335: step: 264/77, loss: 0.0009675707551650703 2023-01-22 09:41:36.572686: step: 268/77, loss: 0.011927079409360886 2023-01-22 09:41:38.087179: step: 272/77, loss: 0.015484156087040901 2023-01-22 09:41:39.521800: step: 276/77, loss: 0.0034936340525746346 2023-01-22 09:41:41.000279: step: 280/77, loss: 0.006303347647190094 2023-01-22 09:41:42.502249: step: 284/77, loss: 0.017508573830127716 2023-01-22 09:41:43.943254: step: 288/77, loss: 0.0041512432508170605 2023-01-22 09:41:45.404262: step: 292/77, loss: 0.011202400550246239 2023-01-22 09:41:46.866664: step: 296/77, loss: 0.04729530215263367 2023-01-22 09:41:48.323473: step: 300/77, loss: 0.054292239248752594 2023-01-22 09:41:49.814645: step: 304/77, loss: 0.03898230940103531 2023-01-22 09:41:51.312513: step: 308/77, loss: 0.016453411430120468 2023-01-22 09:41:52.800939: step: 312/77, loss: 0.0026123595889657736 2023-01-22 09:41:54.276426: step: 316/77, loss: 0.04049624130129814 2023-01-22 09:41:55.789946: step: 320/77, loss: 0.015780622139573097 2023-01-22 09:41:57.264696: step: 324/77, loss: 0.057226769626140594 2023-01-22 09:41:58.698791: step: 328/77, loss: 0.19466768205165863 2023-01-22 09:42:00.121796: step: 332/77, loss: 0.0014788589905947447 2023-01-22 09:42:01.647486: step: 336/77, loss: 0.00024396379012614489 2023-01-22 09:42:03.145326: step: 340/77, loss: 0.038548793643713 2023-01-22 09:42:04.632110: step: 344/77, loss: 0.012706398032605648 2023-01-22 09:42:06.118481: step: 348/77, loss: 0.010326993651688099 2023-01-22 09:42:07.612696: step: 352/77, loss: 0.01408340409398079 2023-01-22 09:42:09.163302: step: 356/77, loss: 0.0048990207724273205 2023-01-22 09:42:10.639673: step: 360/77, loss: 0.007280079182237387 2023-01-22 09:42:12.098197: step: 364/77, loss: 0.05593865364789963 2023-01-22 09:42:13.573629: step: 368/77, loss: 0.007230781018733978 2023-01-22 09:42:15.072872: step: 372/77, loss: 0.05424201488494873 2023-01-22 09:42:16.500614: step: 376/77, loss: 0.01386354211717844 2023-01-22 09:42:17.991925: step: 380/77, loss: 0.003996170591562986 2023-01-22 09:42:19.465062: step: 384/77, loss: 0.0027065842878073454 2023-01-22 09:42:20.919679: step: 388/77, loss: 0.00222976878285408 ================================================== Loss: 0.027 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 6} Test Chinese: {'template': {'p': 0.9333333333333333, 'r': 0.546875, 'f1': 0.689655172413793}, 'slot': {'p': 0.5, 'r': 0.009813542688910697, 'f1': 0.0192492781520693}, 'combined': 0.013275364242806412, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 6} Test Korean: {'template': {'p': 0.9324324324324325, 'r': 0.5390625, 'f1': 0.6831683168316832}, 'slot': {'p': 0.5, 'r': 0.009813542688910697, 'f1': 0.0192492781520693}, 'combined': 0.013150496955374078, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 6} Test Russian: {'template': {'p': 0.9333333333333333, 'r': 0.546875, 'f1': 0.689655172413793}, 'slot': {'p': 0.5, 'r': 0.009813542688910697, 'f1': 0.0192492781520693}, 'combined': 0.013275364242806412, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 6} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 7 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:44:06.346114: step: 4/77, loss: 0.13170115649700165 2023-01-22 09:44:07.797244: step: 8/77, loss: 0.08570437878370285 2023-01-22 09:44:09.269145: step: 12/77, loss: 0.06896840035915375 2023-01-22 09:44:10.786672: step: 16/77, loss: 0.028167221695184708 2023-01-22 09:44:12.331100: step: 20/77, loss: 0.02266467735171318 2023-01-22 09:44:13.796633: step: 24/77, loss: 0.0014462533872574568 2023-01-22 09:44:15.262465: step: 28/77, loss: 0.09091892093420029 2023-01-22 09:44:16.739501: step: 32/77, loss: 0.0003704461269080639 2023-01-22 09:44:18.219068: step: 36/77, loss: 0.0035481564700603485 2023-01-22 09:44:19.701000: step: 40/77, loss: 0.03377827629446983 2023-01-22 09:44:21.149715: step: 44/77, loss: 0.011628975160419941 2023-01-22 09:44:22.656483: step: 48/77, loss: 0.0012261226074770093 2023-01-22 09:44:24.168629: step: 52/77, loss: 0.008258574642241001 2023-01-22 09:44:25.630895: step: 56/77, loss: 0.014765463769435883 2023-01-22 09:44:27.117326: step: 60/77, loss: 0.012880018912255764 2023-01-22 09:44:28.560592: step: 64/77, loss: 0.0017860515508800745 2023-01-22 09:44:30.025942: step: 68/77, loss: 0.010468395426869392 2023-01-22 09:44:31.446840: step: 72/77, loss: 0.03757264092564583 2023-01-22 09:44:32.906914: step: 76/77, loss: 0.018443763256072998 2023-01-22 09:44:34.402116: step: 80/77, loss: 0.005401423200964928 2023-01-22 09:44:35.943411: step: 84/77, loss: 0.048929158598184586 2023-01-22 09:44:37.475240: step: 88/77, loss: 0.11744247376918793 2023-01-22 09:44:38.895194: step: 92/77, loss: 0.00185915338806808 2023-01-22 09:44:40.354352: step: 96/77, loss: 0.008274192921817303 2023-01-22 09:44:41.844074: step: 100/77, loss: 0.009979705326259136 2023-01-22 09:44:43.314791: step: 104/77, loss: 0.0325552262365818 2023-01-22 09:44:44.793867: step: 108/77, loss: 0.0005193643737584352 2023-01-22 09:44:46.290432: step: 112/77, loss: 0.026102589443325996 2023-01-22 09:44:47.807077: step: 116/77, loss: 0.004790120758116245 2023-01-22 09:44:49.346807: step: 120/77, loss: 0.020720547065138817 2023-01-22 09:44:50.814829: step: 124/77, loss: 0.013181292451918125 2023-01-22 09:44:52.231052: step: 128/77, loss: 0.15328733623027802 2023-01-22 09:44:53.748632: step: 132/77, loss: 0.006637411192059517 2023-01-22 09:44:55.234544: step: 136/77, loss: 0.026826925575733185 2023-01-22 09:44:56.642591: step: 140/77, loss: 0.002037283033132553 2023-01-22 09:44:58.115346: step: 144/77, loss: 0.002992324996739626 2023-01-22 09:44:59.556430: step: 148/77, loss: 0.015959719195961952 2023-01-22 09:45:01.099259: step: 152/77, loss: 0.012633001431822777 2023-01-22 09:45:02.531908: step: 156/77, loss: 0.002229101490229368 2023-01-22 09:45:03.910324: step: 160/77, loss: 0.005678039044141769 2023-01-22 09:45:05.323729: step: 164/77, loss: 0.07291337847709656 2023-01-22 09:45:06.766165: step: 168/77, loss: 0.06404541432857513 2023-01-22 09:45:08.214906: step: 172/77, loss: 0.04879661649465561 2023-01-22 09:45:09.743150: step: 176/77, loss: 0.004770440515130758 2023-01-22 09:45:11.140999: step: 180/77, loss: 0.07261113822460175 2023-01-22 09:45:12.602551: step: 184/77, loss: 0.09107305854558945 2023-01-22 09:45:14.071564: step: 188/77, loss: 0.03481244295835495 2023-01-22 09:45:15.512806: step: 192/77, loss: 0.026612192392349243 2023-01-22 09:45:17.057305: step: 196/77, loss: 0.013187481090426445 2023-01-22 09:45:18.453257: step: 200/77, loss: 0.007433123886585236 2023-01-22 09:45:19.893774: step: 204/77, loss: 0.014218202792108059 2023-01-22 09:45:21.312949: step: 208/77, loss: 0.023550231009721756 2023-01-22 09:45:22.826762: step: 212/77, loss: 0.06618601828813553 2023-01-22 09:45:24.283494: step: 216/77, loss: 8.782022632658482e-05 2023-01-22 09:45:25.753326: step: 220/77, loss: 0.19314192235469818 2023-01-22 09:45:27.221238: step: 224/77, loss: 0.006154694594442844 2023-01-22 09:45:28.700303: step: 228/77, loss: 0.04159718006849289 2023-01-22 09:45:30.189568: step: 232/77, loss: 0.051758646965026855 2023-01-22 09:45:31.695646: step: 236/77, loss: 0.045750297605991364 2023-01-22 09:45:33.172346: step: 240/77, loss: 0.02127264067530632 2023-01-22 09:45:34.667317: step: 244/77, loss: 0.01488424651324749 2023-01-22 09:45:36.145615: step: 248/77, loss: 0.039323072880506516 2023-01-22 09:45:37.656185: step: 252/77, loss: 0.038724955171346664 2023-01-22 09:45:39.102769: step: 256/77, loss: 0.016217941418290138 2023-01-22 09:45:40.571936: step: 260/77, loss: 0.00018854241352528334 2023-01-22 09:45:41.994441: step: 264/77, loss: 0.029617827385663986 2023-01-22 09:45:43.495872: step: 268/77, loss: 0.014795455150306225 2023-01-22 09:45:44.935257: step: 272/77, loss: 0.027942290529608727 2023-01-22 09:45:46.378331: step: 276/77, loss: 0.03196156397461891 2023-01-22 09:45:47.834120: step: 280/77, loss: 0.018221847712993622 2023-01-22 09:45:49.358750: step: 284/77, loss: 0.007844426669180393 2023-01-22 09:45:50.836610: step: 288/77, loss: 0.025259993970394135 2023-01-22 09:45:52.263270: step: 292/77, loss: 0.0016258007381111383 2023-01-22 09:45:53.695456: step: 296/77, loss: 0.018152914941310883 2023-01-22 09:45:55.186531: step: 300/77, loss: 0.011314081028103828 2023-01-22 09:45:56.654553: step: 304/77, loss: 0.04237208515405655 2023-01-22 09:45:58.135678: step: 308/77, loss: 0.011828781105577946 2023-01-22 09:45:59.576620: step: 312/77, loss: 0.00927747879177332 2023-01-22 09:46:01.096529: step: 316/77, loss: 0.02505832351744175 2023-01-22 09:46:02.579582: step: 320/77, loss: 0.004240931943058968 2023-01-22 09:46:04.058368: step: 324/77, loss: 0.0011124876327812672 2023-01-22 09:46:05.532579: step: 328/77, loss: 0.003472943790256977 2023-01-22 09:46:06.981790: step: 332/77, loss: 0.021344980224967003 2023-01-22 09:46:08.427932: step: 336/77, loss: 0.054589562118053436 2023-01-22 09:46:09.900963: step: 340/77, loss: 0.028044283390045166 2023-01-22 09:46:11.369013: step: 344/77, loss: 0.013498909771442413 2023-01-22 09:46:12.807634: step: 348/77, loss: 0.003228034358471632 2023-01-22 09:46:14.231970: step: 352/77, loss: 0.000316204852424562 2023-01-22 09:46:15.724217: step: 356/77, loss: 0.009995604865252972 2023-01-22 09:46:17.175423: step: 360/77, loss: 0.018175769597291946 2023-01-22 09:46:18.691270: step: 364/77, loss: 0.0027893283404409885 2023-01-22 09:46:20.121984: step: 368/77, loss: 0.0030180648900568485 2023-01-22 09:46:21.500672: step: 372/77, loss: 0.015148220583796501 2023-01-22 09:46:22.949681: step: 376/77, loss: 0.00036407759762369096 2023-01-22 09:46:24.412914: step: 380/77, loss: 0.03946799784898758 2023-01-22 09:46:25.864873: step: 384/77, loss: 0.0014542130520567298 2023-01-22 09:46:27.354663: step: 388/77, loss: 0.0005557957338169217 ================================================== Loss: 0.027 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Chinese: {'template': {'p': 0.948051948051948, 'r': 0.5703125, 'f1': 0.7121951219512196}, 'slot': {'p': 0.6, 'r': 0.017664376840039256, 'f1': 0.034318398474737846}, 'combined': 0.02444139598688647, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Korean: {'template': {'p': 0.9473684210526315, 'r': 0.5625, 'f1': 0.7058823529411765}, 'slot': {'p': 0.6, 'r': 0.017664376840039256, 'f1': 0.034318398474737846}, 'combined': 0.024224751864520833, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Russian: {'template': {'p': 0.948051948051948, 'r': 0.5703125, 'f1': 0.7121951219512196}, 'slot': {'p': 0.6, 'r': 0.017664376840039256, 'f1': 0.034318398474737846}, 'combined': 0.02444139598688647, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 8 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:48:12.976866: step: 4/77, loss: 0.018476076424121857 2023-01-22 09:48:14.494837: step: 8/77, loss: 0.03965684399008751 2023-01-22 09:48:16.021247: step: 12/77, loss: 0.021786324679851532 2023-01-22 09:48:17.490993: step: 16/77, loss: 0.020817160606384277 2023-01-22 09:48:18.881238: step: 20/77, loss: 0.0024067708291113377 2023-01-22 09:48:20.351668: step: 24/77, loss: 0.004701969679445028 2023-01-22 09:48:21.850767: step: 28/77, loss: 0.0046493480913341045 2023-01-22 09:48:23.342948: step: 32/77, loss: 0.02824241667985916 2023-01-22 09:48:24.738019: step: 36/77, loss: 0.016335386782884598 2023-01-22 09:48:26.256042: step: 40/77, loss: 0.020319268107414246 2023-01-22 09:48:27.669118: step: 44/77, loss: 0.016872696578502655 2023-01-22 09:48:29.126049: step: 48/77, loss: 0.0011987756006419659 2023-01-22 09:48:30.618822: step: 52/77, loss: 0.00891688372939825 2023-01-22 09:48:32.083301: step: 56/77, loss: 0.009940674528479576 2023-01-22 09:48:33.492514: step: 60/77, loss: 0.009649500250816345 2023-01-22 09:48:34.903110: step: 64/77, loss: 0.005740232300013304 2023-01-22 09:48:36.319103: step: 68/77, loss: 0.04176686704158783 2023-01-22 09:48:37.856176: step: 72/77, loss: 0.0452922098338604 2023-01-22 09:48:39.286395: step: 76/77, loss: 0.06259751319885254 2023-01-22 09:48:40.785718: step: 80/77, loss: 0.011904904618859291 2023-01-22 09:48:42.234922: step: 84/77, loss: 0.019501540809869766 2023-01-22 09:48:43.691725: step: 88/77, loss: 0.007173976395279169 2023-01-22 09:48:45.189852: step: 92/77, loss: 0.003331177169457078 2023-01-22 09:48:46.662049: step: 96/77, loss: 0.004057619255036116 2023-01-22 09:48:48.107073: step: 100/77, loss: 0.02370486967265606 2023-01-22 09:48:49.527146: step: 104/77, loss: 0.005920400843024254 2023-01-22 09:48:50.983672: step: 108/77, loss: 0.04308168590068817 2023-01-22 09:48:52.479970: step: 112/77, loss: 0.012136734090745449 2023-01-22 09:48:53.996706: step: 116/77, loss: 0.003824051935225725 2023-01-22 09:48:55.407630: step: 120/77, loss: 0.012575005181133747 2023-01-22 09:48:56.879540: step: 124/77, loss: 0.0017235910054296255 2023-01-22 09:48:58.288431: step: 128/77, loss: 0.021024808287620544 2023-01-22 09:48:59.796224: step: 132/77, loss: 0.018988901749253273 2023-01-22 09:49:01.258536: step: 136/77, loss: 0.014004884287714958 2023-01-22 09:49:02.677246: step: 140/77, loss: 0.02050507254898548 2023-01-22 09:49:04.116439: step: 144/77, loss: 0.0723520815372467 2023-01-22 09:49:05.539175: step: 148/77, loss: 0.0015499040018767118 2023-01-22 09:49:06.990062: step: 152/77, loss: 0.08790557086467743 2023-01-22 09:49:08.424712: step: 156/77, loss: 0.0010261915158480406 2023-01-22 09:49:09.886772: step: 160/77, loss: 0.003959516994655132 2023-01-22 09:49:11.318587: step: 164/77, loss: 0.05613886937499046 2023-01-22 09:49:12.804584: step: 168/77, loss: 0.014303348958492279 2023-01-22 09:49:14.252683: step: 172/77, loss: 0.004931567702442408 2023-01-22 09:49:15.707171: step: 176/77, loss: 0.057635094970464706 2023-01-22 09:49:17.177386: step: 180/77, loss: 0.0050613465718925 2023-01-22 09:49:18.636294: step: 184/77, loss: 0.0029486017301678658 2023-01-22 09:49:20.093720: step: 188/77, loss: 0.0020928424783051014 2023-01-22 09:49:21.546926: step: 192/77, loss: 0.006583016831427813 2023-01-22 09:49:23.048419: step: 196/77, loss: 0.016957614570856094 2023-01-22 09:49:24.515001: step: 200/77, loss: 0.0015637626638635993 2023-01-22 09:49:25.977387: step: 204/77, loss: 0.025140559300780296 2023-01-22 09:49:27.406160: step: 208/77, loss: 0.091184601187706 2023-01-22 09:49:28.820120: step: 212/77, loss: 0.009486174210906029 2023-01-22 09:49:30.225315: step: 216/77, loss: 0.004166465252637863 2023-01-22 09:49:31.699409: step: 220/77, loss: 0.011669320985674858 2023-01-22 09:49:33.186833: step: 224/77, loss: 0.001169293187558651 2023-01-22 09:49:34.578825: step: 228/77, loss: 0.012834685854613781 2023-01-22 09:49:36.032586: step: 232/77, loss: 0.010140188038349152 2023-01-22 09:49:37.466538: step: 236/77, loss: 0.015939615666866302 2023-01-22 09:49:38.941943: step: 240/77, loss: 0.02318131923675537 2023-01-22 09:49:40.354214: step: 244/77, loss: 0.013882097788155079 2023-01-22 09:49:41.810313: step: 248/77, loss: 0.01791475899517536 2023-01-22 09:49:43.358453: step: 252/77, loss: 0.012156199663877487 2023-01-22 09:49:44.841117: step: 256/77, loss: 0.015079764649271965 2023-01-22 09:49:46.344228: step: 260/77, loss: 0.0012284107506275177 2023-01-22 09:49:47.805356: step: 264/77, loss: 0.018296556547284126 2023-01-22 09:49:49.326226: step: 268/77, loss: 0.013591526076197624 2023-01-22 09:49:50.744879: step: 272/77, loss: 0.07734925299882889 2023-01-22 09:49:52.241260: step: 276/77, loss: 0.06120207533240318 2023-01-22 09:49:53.714729: step: 280/77, loss: 0.012348676100373268 2023-01-22 09:49:55.119820: step: 284/77, loss: 0.0011046245926991105 2023-01-22 09:49:56.568959: step: 288/77, loss: 0.008927395567297935 2023-01-22 09:49:58.054964: step: 292/77, loss: 0.000859726220369339 2023-01-22 09:49:59.565025: step: 296/77, loss: 0.03434096649289131 2023-01-22 09:50:01.045636: step: 300/77, loss: 0.046841878443956375 2023-01-22 09:50:02.471998: step: 304/77, loss: 0.03707262501120567 2023-01-22 09:50:03.960687: step: 308/77, loss: 0.05187338963150978 2023-01-22 09:50:05.466716: step: 312/77, loss: 0.03167300298810005 2023-01-22 09:50:06.922425: step: 316/77, loss: 0.018084578216075897 2023-01-22 09:50:08.424338: step: 320/77, loss: 0.040085483342409134 2023-01-22 09:50:09.868539: step: 324/77, loss: 0.011672168038785458 2023-01-22 09:50:11.299505: step: 328/77, loss: 0.020293209701776505 2023-01-22 09:50:12.803879: step: 332/77, loss: 0.007086321711540222 2023-01-22 09:50:14.266741: step: 336/77, loss: 0.07502918690443039 2023-01-22 09:50:15.735794: step: 340/77, loss: 0.00554603012278676 2023-01-22 09:50:17.186134: step: 344/77, loss: 0.00422379607334733 2023-01-22 09:50:18.609540: step: 348/77, loss: 0.01804770715534687 2023-01-22 09:50:20.028384: step: 352/77, loss: 0.009498151950538158 2023-01-22 09:50:21.475768: step: 356/77, loss: 0.006640831008553505 2023-01-22 09:50:22.938808: step: 360/77, loss: 0.011323105543851852 2023-01-22 09:50:24.337589: step: 364/77, loss: 0.0036079012788832188 2023-01-22 09:50:25.795742: step: 368/77, loss: 0.018234066665172577 2023-01-22 09:50:27.248102: step: 372/77, loss: 0.0013295363169163465 2023-01-22 09:50:28.632241: step: 376/77, loss: 0.02715039998292923 2023-01-22 09:50:30.149469: step: 380/77, loss: 0.005858607590198517 2023-01-22 09:50:31.676649: step: 384/77, loss: 0.004114366136491299 2023-01-22 09:50:33.160419: step: 388/77, loss: 0.034678973257541656 ================================================== Loss: 0.020 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Chinese: {'template': {'p': 0.9367088607594937, 'r': 0.578125, 'f1': 0.7149758454106279}, 'slot': {'p': 0.5806451612903226, 'r': 0.017664376840039256, 'f1': 0.03428571428571429}, 'combined': 0.024513457556935812, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Korean: {'template': {'p': 0.9358974358974359, 'r': 0.5703125, 'f1': 0.7087378640776699}, 'slot': {'p': 0.5333333333333333, 'r': 0.015701668302257114, 'f1': 0.030505243088655855}, 'combined': 0.021620220829824052, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Russian: {'template': {'p': 0.9358974358974359, 'r': 0.5703125, 'f1': 0.7087378640776699}, 'slot': {'p': 0.5333333333333333, 'r': 0.015701668302257114, 'f1': 0.030505243088655855}, 'combined': 0.021620220829824052, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 9 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:52:19.142363: step: 4/77, loss: 0.06111355870962143 2023-01-22 09:52:20.568121: step: 8/77, loss: 0.03258708119392395 2023-01-22 09:52:22.081862: step: 12/77, loss: 0.007520253770053387 2023-01-22 09:52:23.551060: step: 16/77, loss: 0.0033959210850298405 2023-01-22 09:52:24.987078: step: 20/77, loss: 0.018976226449012756 2023-01-22 09:52:26.434722: step: 24/77, loss: 0.022655602544546127 2023-01-22 09:52:27.932383: step: 28/77, loss: 0.0007378259324468672 2023-01-22 09:52:29.370165: step: 32/77, loss: 0.023256225511431694 2023-01-22 09:52:30.811394: step: 36/77, loss: 0.03412342816591263 2023-01-22 09:52:32.332293: step: 40/77, loss: 0.011993163265287876 2023-01-22 09:52:33.852487: step: 44/77, loss: 0.005109499208629131 2023-01-22 09:52:35.280591: step: 48/77, loss: 0.00035834929440170527 2023-01-22 09:52:36.709033: step: 52/77, loss: 0.3323846757411957 2023-01-22 09:52:38.131352: step: 56/77, loss: 0.00893863383680582 2023-01-22 09:52:39.607373: step: 60/77, loss: 0.0043699974194169044 2023-01-22 09:52:41.044795: step: 64/77, loss: 0.022914201021194458 2023-01-22 09:52:42.507594: step: 68/77, loss: 0.0014546663733199239 2023-01-22 09:52:44.000207: step: 72/77, loss: 0.004197940230369568 2023-01-22 09:52:45.439034: step: 76/77, loss: 0.012708312831819057 2023-01-22 09:52:46.885813: step: 80/77, loss: 0.0046439156867563725 2023-01-22 09:52:48.396035: step: 84/77, loss: 0.007488596718758345 2023-01-22 09:52:49.839454: step: 88/77, loss: 0.00971380528062582 2023-01-22 09:52:51.304265: step: 92/77, loss: 0.00930837169289589 2023-01-22 09:52:52.786488: step: 96/77, loss: 0.020319797098636627 2023-01-22 09:52:54.276011: step: 100/77, loss: 0.005104791838675737 2023-01-22 09:52:55.737930: step: 104/77, loss: 0.08812321722507477 2023-01-22 09:52:57.301065: step: 108/77, loss: 0.0001489566930104047 2023-01-22 09:52:58.763590: step: 112/77, loss: 0.001770321512594819 2023-01-22 09:53:00.221731: step: 116/77, loss: 0.0025908551178872585 2023-01-22 09:53:01.660350: step: 120/77, loss: 0.000276944920187816 2023-01-22 09:53:03.159281: step: 124/77, loss: 0.0008326682145707309 2023-01-22 09:53:04.576546: step: 128/77, loss: 0.003297338727861643 2023-01-22 09:53:06.018191: step: 132/77, loss: 0.006724027916789055 2023-01-22 09:53:07.462874: step: 136/77, loss: 8.205480116885155e-05 2023-01-22 09:53:08.859623: step: 140/77, loss: 0.09621748328208923 2023-01-22 09:53:10.274986: step: 144/77, loss: 0.0013389689847826958 2023-01-22 09:53:11.758935: step: 148/77, loss: 0.0012504963669925928 2023-01-22 09:53:13.264047: step: 152/77, loss: 8.572525257477537e-05 2023-01-22 09:53:14.788259: step: 156/77, loss: 0.056684426963329315 2023-01-22 09:53:16.254539: step: 160/77, loss: 0.044627901166677475 2023-01-22 09:53:17.725285: step: 164/77, loss: 0.005165470764040947 2023-01-22 09:53:19.177118: step: 168/77, loss: 0.07160969823598862 2023-01-22 09:53:20.587260: step: 172/77, loss: 0.021577483043074608 2023-01-22 09:53:22.022190: step: 176/77, loss: 0.04253820329904556 2023-01-22 09:53:23.446571: step: 180/77, loss: 0.004028831608593464 2023-01-22 09:53:24.861611: step: 184/77, loss: 0.00423656078055501 2023-01-22 09:53:26.294317: step: 188/77, loss: 0.00010493271111045033 2023-01-22 09:53:27.753277: step: 192/77, loss: 0.06777942180633545 2023-01-22 09:53:29.263773: step: 196/77, loss: 0.06577803939580917 2023-01-22 09:53:30.753867: step: 200/77, loss: 0.019269591197371483 2023-01-22 09:53:32.218178: step: 204/77, loss: 0.0390661284327507 2023-01-22 09:53:33.645501: step: 208/77, loss: 0.013898050412535667 2023-01-22 09:53:35.129579: step: 212/77, loss: 0.028204157948493958 2023-01-22 09:53:36.622909: step: 216/77, loss: 0.005166948307305574 2023-01-22 09:53:38.208684: step: 220/77, loss: 0.014332697726786137 2023-01-22 09:53:39.679673: step: 224/77, loss: 0.05497150868177414 2023-01-22 09:53:41.092999: step: 228/77, loss: 0.012002397328615189 2023-01-22 09:53:42.562014: step: 232/77, loss: 0.011243941262364388 2023-01-22 09:53:44.031677: step: 236/77, loss: 0.001498940633609891 2023-01-22 09:53:45.573737: step: 240/77, loss: 0.02745002880692482 2023-01-22 09:53:46.980639: step: 244/77, loss: 0.023813052102923393 2023-01-22 09:53:48.502932: step: 248/77, loss: 0.039781540632247925 2023-01-22 09:53:49.941982: step: 252/77, loss: 0.19215527176856995 2023-01-22 09:53:51.376452: step: 256/77, loss: 0.005475187674164772 2023-01-22 09:53:52.858207: step: 260/77, loss: 0.0012005360331386328 2023-01-22 09:53:54.298976: step: 264/77, loss: 0.0016509962733834982 2023-01-22 09:53:55.803369: step: 268/77, loss: 0.0016201656544581056 2023-01-22 09:53:57.302720: step: 272/77, loss: 0.023119984194636345 2023-01-22 09:53:58.776306: step: 276/77, loss: 0.010675408877432346 2023-01-22 09:54:00.321162: step: 280/77, loss: 0.0067598833702504635 2023-01-22 09:54:01.773063: step: 284/77, loss: 0.019721917808055878 2023-01-22 09:54:03.242620: step: 288/77, loss: 0.017890213057398796 2023-01-22 09:54:04.750268: step: 292/77, loss: 0.01658380590379238 2023-01-22 09:54:06.218577: step: 296/77, loss: 0.0016131179872900248 2023-01-22 09:54:07.647290: step: 300/77, loss: 0.014928465709090233 2023-01-22 09:54:09.110949: step: 304/77, loss: 0.003837120020762086 2023-01-22 09:54:10.542189: step: 308/77, loss: 0.011906763538718224 2023-01-22 09:54:11.978471: step: 312/77, loss: 0.003964369650930166 2023-01-22 09:54:13.488533: step: 316/77, loss: 0.012319169007241726 2023-01-22 09:54:15.044043: step: 320/77, loss: 0.01478549838066101 2023-01-22 09:54:16.571822: step: 324/77, loss: 0.06367487460374832 2023-01-22 09:54:18.011703: step: 328/77, loss: 0.004723585210740566 2023-01-22 09:54:19.507405: step: 332/77, loss: 0.01060541719198227 2023-01-22 09:54:20.989903: step: 336/77, loss: 0.09059099853038788 2023-01-22 09:54:22.431474: step: 340/77, loss: 0.025290492922067642 2023-01-22 09:54:23.916863: step: 344/77, loss: 0.008649226278066635 2023-01-22 09:54:25.403327: step: 348/77, loss: 0.02813754975795746 2023-01-22 09:54:26.852807: step: 352/77, loss: 0.05314696580171585 2023-01-22 09:54:28.344681: step: 356/77, loss: 0.024964183568954468 2023-01-22 09:54:29.838344: step: 360/77, loss: 0.04871615767478943 2023-01-22 09:54:31.260281: step: 364/77, loss: 0.0009216809994541109 2023-01-22 09:54:32.664375: step: 368/77, loss: 0.03678221255540848 2023-01-22 09:54:34.086411: step: 372/77, loss: 0.00028942187782377005 2023-01-22 09:54:35.541944: step: 376/77, loss: 0.06943628937005997 2023-01-22 09:54:36.960549: step: 380/77, loss: 0.010281715542078018 2023-01-22 09:54:38.348811: step: 384/77, loss: 0.022855132818222046 2023-01-22 09:54:39.804187: step: 388/77, loss: 0.0057218074798583984 ================================================== Loss: 0.025 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Chinese: {'template': {'p': 0.9066666666666666, 'r': 0.53125, 'f1': 0.6699507389162561}, 'slot': {'p': 0.4878048780487805, 'r': 0.019627085377821395, 'f1': 0.03773584905660377}, 'combined': 0.025281159959104002, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Korean: {'template': {'p': 0.9066666666666666, 'r': 0.53125, 'f1': 0.6699507389162561}, 'slot': {'p': 0.4878048780487805, 'r': 0.019627085377821395, 'f1': 0.03773584905660377}, 'combined': 0.025281159959104002, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Russian: {'template': {'p': 0.9066666666666666, 'r': 0.53125, 'f1': 0.6699507389162561}, 'slot': {'p': 0.4878048780487805, 'r': 0.019627085377821395, 'f1': 0.03773584905660377}, 'combined': 0.025281159959104002, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 10 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:56:21.720281: step: 4/77, loss: 0.004413371905684471 2023-01-22 09:56:23.089436: step: 8/77, loss: 0.011603401042521 2023-01-22 09:56:24.444252: step: 12/77, loss: 0.0070768119767308235 2023-01-22 09:56:25.823224: step: 16/77, loss: 0.011790785938501358 2023-01-22 09:56:27.248884: step: 20/77, loss: 0.003310129977762699 2023-01-22 09:56:28.689023: step: 24/77, loss: 0.001037739566527307 2023-01-22 09:56:30.111302: step: 28/77, loss: 0.0685984268784523 2023-01-22 09:56:31.502290: step: 32/77, loss: 0.012599118985235691 2023-01-22 09:56:32.939665: step: 36/77, loss: 0.031620293855667114 2023-01-22 09:56:34.453843: step: 40/77, loss: 0.015515482984483242 2023-01-22 09:56:35.912632: step: 44/77, loss: 0.013255885802209377 2023-01-22 09:56:37.283630: step: 48/77, loss: 0.017563162371516228 2023-01-22 09:56:38.714988: step: 52/77, loss: 0.023024428635835648 2023-01-22 09:56:40.164250: step: 56/77, loss: 0.015319026075303555 2023-01-22 09:56:41.594804: step: 60/77, loss: 0.0038829308468848467 2023-01-22 09:56:43.081843: step: 64/77, loss: 0.010435502976179123 2023-01-22 09:56:44.549390: step: 68/77, loss: 0.004648303613066673 2023-01-22 09:56:46.027739: step: 72/77, loss: 0.0006056458223611116 2023-01-22 09:56:47.521803: step: 76/77, loss: 0.0007848583627492189 2023-01-22 09:56:48.968022: step: 80/77, loss: 0.0031027907971292734 2023-01-22 09:56:50.397102: step: 84/77, loss: 0.002980540506541729 2023-01-22 09:56:51.854458: step: 88/77, loss: 0.004327086266130209 2023-01-22 09:56:53.368311: step: 92/77, loss: 0.0061401608400046825 2023-01-22 09:56:54.800295: step: 96/77, loss: 0.03094206564128399 2023-01-22 09:56:56.244551: step: 100/77, loss: 0.010667397640645504 2023-01-22 09:56:57.709700: step: 104/77, loss: 0.002312043448910117 2023-01-22 09:56:59.143782: step: 108/77, loss: 0.02056185156106949 2023-01-22 09:57:00.555794: step: 112/77, loss: 0.013677126727998257 2023-01-22 09:57:01.965725: step: 116/77, loss: 0.03300682455301285 2023-01-22 09:57:03.396717: step: 120/77, loss: 0.002695606555789709 2023-01-22 09:57:04.818916: step: 124/77, loss: 0.0016732718795537949 2023-01-22 09:57:06.248057: step: 128/77, loss: 0.017262576147913933 2023-01-22 09:57:07.634801: step: 132/77, loss: 0.01876669004559517 2023-01-22 09:57:09.109594: step: 136/77, loss: 0.007911132648587227 2023-01-22 09:57:10.525598: step: 140/77, loss: 0.007990386337041855 2023-01-22 09:57:12.058894: step: 144/77, loss: 0.001423410139977932 2023-01-22 09:57:13.545054: step: 148/77, loss: 0.0002995043178088963 2023-01-22 09:57:14.979337: step: 152/77, loss: 0.0005054707289673388 2023-01-22 09:57:16.410584: step: 156/77, loss: 0.010499808937311172 2023-01-22 09:57:17.884142: step: 160/77, loss: 0.01981007121503353 2023-01-22 09:57:19.312587: step: 164/77, loss: 0.061028920114040375 2023-01-22 09:57:20.751401: step: 168/77, loss: 0.007777105551213026 2023-01-22 09:57:22.184194: step: 172/77, loss: 0.0684259682893753 2023-01-22 09:57:23.641632: step: 176/77, loss: 0.027019374072551727 2023-01-22 09:57:25.029393: step: 180/77, loss: 0.003009375650435686 2023-01-22 09:57:26.437461: step: 184/77, loss: 0.03607326000928879 2023-01-22 09:57:27.912985: step: 188/77, loss: 0.0006161195342428982 2023-01-22 09:57:29.378806: step: 192/77, loss: 0.03203636780381203 2023-01-22 09:57:30.814540: step: 196/77, loss: 0.009488014504313469 2023-01-22 09:57:32.273959: step: 200/77, loss: 0.0061500160954892635 2023-01-22 09:57:33.689869: step: 204/77, loss: 0.003809800138697028 2023-01-22 09:57:35.108938: step: 208/77, loss: 0.0006669530994258821 2023-01-22 09:57:36.594821: step: 212/77, loss: 0.09732852131128311 2023-01-22 09:57:38.043753: step: 216/77, loss: 0.023626577109098434 2023-01-22 09:57:39.554025: step: 220/77, loss: 0.036544617265462875 2023-01-22 09:57:41.030895: step: 224/77, loss: 0.009659359231591225 2023-01-22 09:57:42.469265: step: 228/77, loss: 0.0033210739493370056 2023-01-22 09:57:43.884277: step: 232/77, loss: 0.0032579938415437937 2023-01-22 09:57:45.340577: step: 236/77, loss: 0.03421283885836601 2023-01-22 09:57:46.866471: step: 240/77, loss: 0.019285082817077637 2023-01-22 09:57:48.304324: step: 244/77, loss: 0.025123365223407745 2023-01-22 09:57:49.727780: step: 248/77, loss: 0.007166619878262281 2023-01-22 09:57:51.136793: step: 252/77, loss: 0.038742199540138245 2023-01-22 09:57:52.653453: step: 256/77, loss: 0.0009126511286012828 2023-01-22 09:57:54.135420: step: 260/77, loss: 0.020126396790146828 2023-01-22 09:57:55.605790: step: 264/77, loss: 0.02404876798391342 2023-01-22 09:57:57.087631: step: 268/77, loss: 0.006159630138427019 2023-01-22 09:57:58.548728: step: 272/77, loss: 0.010606272146105766 2023-01-22 09:57:59.908868: step: 276/77, loss: 0.04204922169446945 2023-01-22 09:58:01.362346: step: 280/77, loss: 0.007835128344595432 2023-01-22 09:58:02.806631: step: 284/77, loss: 0.017098350450396538 2023-01-22 09:58:04.342145: step: 288/77, loss: 0.011766848154366016 2023-01-22 09:58:05.722235: step: 292/77, loss: 0.000319849670631811 2023-01-22 09:58:07.210249: step: 296/77, loss: 0.07437402009963989 2023-01-22 09:58:08.630838: step: 300/77, loss: 0.01065000332891941 2023-01-22 09:58:10.153565: step: 304/77, loss: 0.0071182590909302235 2023-01-22 09:58:11.627914: step: 308/77, loss: 0.014701258391141891 2023-01-22 09:58:13.017306: step: 312/77, loss: 0.03189054876565933 2023-01-22 09:58:14.453136: step: 316/77, loss: 0.023664599284529686 2023-01-22 09:58:15.871578: step: 320/77, loss: 0.001820672769099474 2023-01-22 09:58:17.379509: step: 324/77, loss: 0.03477199375629425 2023-01-22 09:58:18.765011: step: 328/77, loss: 0.051144201308488846 2023-01-22 09:58:20.267506: step: 332/77, loss: 0.03953402116894722 2023-01-22 09:58:21.685594: step: 336/77, loss: 0.026595700532197952 2023-01-22 09:58:23.063822: step: 340/77, loss: 0.01947232335805893 2023-01-22 09:58:24.470190: step: 344/77, loss: 0.005510674323886633 2023-01-22 09:58:25.970383: step: 348/77, loss: 0.005318933166563511 2023-01-22 09:58:27.373216: step: 352/77, loss: 0.021326635032892227 2023-01-22 09:58:28.848027: step: 356/77, loss: 0.033761221915483475 2023-01-22 09:58:30.354916: step: 360/77, loss: 0.005709969904273748 2023-01-22 09:58:31.811417: step: 364/77, loss: 0.012541381642222404 2023-01-22 09:58:33.234115: step: 368/77, loss: 0.002873632125556469 2023-01-22 09:58:34.721924: step: 372/77, loss: 0.010858142748475075 2023-01-22 09:58:36.198718: step: 376/77, loss: 0.0023356713354587555 2023-01-22 09:58:37.636873: step: 380/77, loss: 0.04005616530776024 2023-01-22 09:58:39.070976: step: 384/77, loss: 0.0035838475450873375 2023-01-22 09:58:40.429693: step: 388/77, loss: 0.027848560363054276 ================================================== Loss: 0.017 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Chinese: {'template': {'p': 0.9295774647887324, 'r': 0.515625, 'f1': 0.6633165829145728}, 'slot': {'p': 0.5625, 'r': 0.017664376840039256, 'f1': 0.03425309229305423}, 'combined': 0.022720644134086223, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.515625, 'f1': 0.6633165829145728}, 'slot': {'p': 0.5625, 'r': 0.017664376840039256, 'f1': 0.03425309229305423}, 'combined': 0.022720644134086223, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.515625, 'f1': 0.6633165829145728}, 'slot': {'p': 0.59375, 'r': 0.018645731108930325, 'f1': 0.03615604186489058}, 'combined': 0.02398290214153546, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 11 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 10:00:22.198488: step: 4/77, loss: 0.0002601227897685021 2023-01-22 10:00:23.611640: step: 8/77, loss: 0.010210440494120121 2023-01-22 10:00:25.040503: step: 12/77, loss: 0.0026236511766910553 2023-01-22 10:00:26.473506: step: 16/77, loss: 0.003634335473179817 2023-01-22 10:00:27.954496: step: 20/77, loss: 0.0036590697709470987 2023-01-22 10:00:29.414824: step: 24/77, loss: 0.016517236828804016 2023-01-22 10:00:30.828783: step: 28/77, loss: 0.02679351158440113 2023-01-22 10:00:32.278122: step: 32/77, loss: 0.00783599354326725 2023-01-22 10:00:33.662426: step: 36/77, loss: 0.005712728947401047 2023-01-22 10:00:35.078072: step: 40/77, loss: 0.02416357956826687 2023-01-22 10:00:36.533536: step: 44/77, loss: 0.004781464114785194 2023-01-22 10:00:37.964309: step: 48/77, loss: 0.0013125156983733177 2023-01-22 10:00:39.380001: step: 52/77, loss: 0.008379247039556503 2023-01-22 10:00:40.882018: step: 56/77, loss: 0.02059927023947239 2023-01-22 10:00:42.295716: step: 60/77, loss: 0.017647510394454002 2023-01-22 10:00:43.656440: step: 64/77, loss: 0.003159626154229045 2023-01-22 10:00:45.081102: step: 68/77, loss: 0.0001272601803066209 2023-01-22 10:00:46.529509: step: 72/77, loss: 0.010219249874353409 2023-01-22 10:00:47.962295: step: 76/77, loss: 0.0011884287232533097 2023-01-22 10:00:49.369761: step: 80/77, loss: 0.010820185765624046 2023-01-22 10:00:50.784949: step: 84/77, loss: 0.03870443254709244 2023-01-22 10:00:52.262754: step: 88/77, loss: 0.00692142266780138 2023-01-22 10:00:53.728071: step: 92/77, loss: 0.009901397861540318 2023-01-22 10:00:55.158296: step: 96/77, loss: 0.0022363499738276005 2023-01-22 10:00:56.649070: step: 100/77, loss: 0.03652594983577728 2023-01-22 10:00:58.088588: step: 104/77, loss: 0.05372604727745056 2023-01-22 10:00:59.577650: step: 108/77, loss: 0.0008952724747359753 2023-01-22 10:01:01.037352: step: 112/77, loss: 0.2615503668785095 2023-01-22 10:01:02.480588: step: 116/77, loss: 0.00021023042791057378 2023-01-22 10:01:03.914006: step: 120/77, loss: 0.012398790568113327 2023-01-22 10:01:05.439519: step: 124/77, loss: 0.013235787861049175 2023-01-22 10:01:06.921078: step: 128/77, loss: 0.0010583129478618503 2023-01-22 10:01:08.350375: step: 132/77, loss: 0.08256355673074722 2023-01-22 10:01:09.791220: step: 136/77, loss: 0.05031026154756546 2023-01-22 10:01:11.273252: step: 140/77, loss: 0.015843644738197327 2023-01-22 10:01:12.718677: step: 144/77, loss: 0.07007738202810287 2023-01-22 10:01:14.203043: step: 148/77, loss: 0.0006344180437736213 2023-01-22 10:01:15.657211: step: 152/77, loss: 0.002021482679992914 2023-01-22 10:01:17.101841: step: 156/77, loss: 0.04340111464262009 2023-01-22 10:01:18.529995: step: 160/77, loss: 0.05992339551448822 2023-01-22 10:01:20.010391: step: 164/77, loss: 0.0021300525404512882 2023-01-22 10:01:21.390921: step: 168/77, loss: 0.005749665666371584 2023-01-22 10:01:22.883996: step: 172/77, loss: 0.0012523261830210686 2023-01-22 10:01:24.364923: step: 176/77, loss: 0.003278427990153432 2023-01-22 10:01:25.788786: step: 180/77, loss: 0.005797204561531544 2023-01-22 10:01:27.213545: step: 184/77, loss: 0.004608628340065479 2023-01-22 10:01:28.680152: step: 188/77, loss: 0.03436815366148949 2023-01-22 10:01:30.086722: step: 192/77, loss: 0.00024082028539851308 2023-01-22 10:01:31.548771: step: 196/77, loss: 0.06575880199670792 2023-01-22 10:01:32.932016: step: 200/77, loss: 0.008478851988911629 2023-01-22 10:01:34.405297: step: 204/77, loss: 0.00896163284778595 2023-01-22 10:01:35.878166: step: 208/77, loss: 0.008419353514909744 2023-01-22 10:01:37.287144: step: 212/77, loss: 0.006319502368569374 2023-01-22 10:01:38.688888: step: 216/77, loss: 0.05734553560614586 2023-01-22 10:01:40.101057: step: 220/77, loss: 0.015883300453424454 2023-01-22 10:01:41.551111: step: 224/77, loss: 0.05923188850283623 2023-01-22 10:01:42.948430: step: 228/77, loss: 0.006545787677168846 2023-01-22 10:01:44.343466: step: 232/77, loss: 0.004563986789435148 2023-01-22 10:01:45.792919: step: 236/77, loss: 0.0011642567114904523 2023-01-22 10:01:47.158911: step: 240/77, loss: 0.009855842217803001 2023-01-22 10:01:48.557083: step: 244/77, loss: 0.0019316822290420532 2023-01-22 10:01:49.994948: step: 248/77, loss: 0.0028050937689840794 2023-01-22 10:01:51.464299: step: 252/77, loss: 0.017084931954741478 2023-01-22 10:01:52.924446: step: 256/77, loss: 0.0007316049304790795 2023-01-22 10:01:54.405918: step: 260/77, loss: 0.030099056661128998 2023-01-22 10:01:55.880703: step: 264/77, loss: 0.035914164036512375 2023-01-22 10:01:57.263594: step: 268/77, loss: 0.07876569032669067 2023-01-22 10:01:58.653801: step: 272/77, loss: 0.006000447552651167 2023-01-22 10:02:00.052514: step: 276/77, loss: 0.007713375613093376 2023-01-22 10:02:01.483817: step: 280/77, loss: 0.0012402540305629373 2023-01-22 10:02:02.957224: step: 284/77, loss: 0.0010725243482738733 2023-01-22 10:02:04.439139: step: 288/77, loss: 0.05046703666448593 2023-01-22 10:02:05.835458: step: 292/77, loss: 0.0003727408475242555 2023-01-22 10:02:07.267035: step: 296/77, loss: 0.009448207914829254 2023-01-22 10:02:08.636489: step: 300/77, loss: 0.01585238240659237 2023-01-22 10:02:10.121387: step: 304/77, loss: 0.07688168436288834 2023-01-22 10:02:11.555047: step: 308/77, loss: 0.12805522978305817 2023-01-22 10:02:12.940081: step: 312/77, loss: 0.01632690243422985 2023-01-22 10:02:14.368695: step: 316/77, loss: 0.014750231988728046 2023-01-22 10:02:15.775349: step: 320/77, loss: 0.010419289581477642 2023-01-22 10:02:17.269373: step: 324/77, loss: 0.016786780208349228 2023-01-22 10:02:18.736768: step: 328/77, loss: 0.0032613922376185656 2023-01-22 10:02:20.170285: step: 332/77, loss: 0.00038973920163698494 2023-01-22 10:02:21.540073: step: 336/77, loss: 0.03285951167345047 2023-01-22 10:02:22.962899: step: 340/77, loss: 0.030117888003587723 2023-01-22 10:02:24.416080: step: 344/77, loss: 0.00024034206580836326 2023-01-22 10:02:25.819071: step: 348/77, loss: 0.0005765100358985364 2023-01-22 10:02:27.311638: step: 352/77, loss: 0.019205616787075996 2023-01-22 10:02:28.709748: step: 356/77, loss: 0.005558905191719532 2023-01-22 10:02:30.142470: step: 360/77, loss: 0.004256248939782381 2023-01-22 10:02:31.560834: step: 364/77, loss: 0.015143109485507011 2023-01-22 10:02:32.965432: step: 368/77, loss: 0.0031705782748758793 2023-01-22 10:02:34.392374: step: 372/77, loss: 8.148000051733106e-05 2023-01-22 10:02:35.779438: step: 376/77, loss: 3.1818810384720564e-05 2023-01-22 10:02:37.179469: step: 380/77, loss: 3.5136061342200264e-05 2023-01-22 10:02:38.624999: step: 384/77, loss: 0.0008394332253374159 2023-01-22 10:02:40.017377: step: 388/77, loss: 0.058147720992565155 ================================================== Loss: 0.020 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Chinese: {'template': {'p': 0.875, 'r': 0.546875, 'f1': 0.6730769230769231}, 'slot': {'p': 0.5666666666666667, 'r': 0.016683022571148183, 'f1': 0.032411820781696854}, 'combined': 0.02181564860306519, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Korean: {'template': {'p': 0.8641975308641975, 'r': 0.546875, 'f1': 0.6698564593301436}, 'slot': {'p': 0.5714285714285714, 'r': 0.015701668302257114, 'f1': 0.030563514804202475}, 'combined': 0.0204731678114275, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Russian: {'template': {'p': 0.8641975308641975, 'r': 0.546875, 'f1': 0.6698564593301436}, 'slot': {'p': 0.5666666666666667, 'r': 0.016683022571148183, 'f1': 0.032411820781696854}, 'combined': 0.02171126750927062, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 12 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 10:04:21.597644: step: 4/77, loss: 0.0016438113525509834 2023-01-22 10:04:23.033588: step: 8/77, loss: 0.021829890087246895 2023-01-22 10:04:24.489076: step: 12/77, loss: 0.004034928511828184 2023-01-22 10:04:25.955082: step: 16/77, loss: 4.006828476121882e-06 2023-01-22 10:04:27.320184: step: 20/77, loss: 0.013220297172665596 2023-01-22 10:04:28.755007: step: 24/77, loss: 0.004591222852468491 2023-01-22 10:04:30.162495: step: 28/77, loss: 0.04591258987784386 2023-01-22 10:04:31.550224: step: 32/77, loss: 0.09675614535808563 2023-01-22 10:04:32.974164: step: 36/77, loss: 0.0018667132826521993 2023-01-22 10:04:34.393134: step: 40/77, loss: 0.003975985571742058 2023-01-22 10:04:35.891949: step: 44/77, loss: 0.07758969068527222 2023-01-22 10:04:37.291986: step: 48/77, loss: 0.014692619442939758 2023-01-22 10:04:38.719105: step: 52/77, loss: 0.00668076379224658 2023-01-22 10:04:40.144656: step: 56/77, loss: 0.03895683214068413 2023-01-22 10:04:41.591997: step: 60/77, loss: 0.00016144484106916934 2023-01-22 10:04:43.041286: step: 64/77, loss: 0.009363364428281784 2023-01-22 10:04:44.426017: step: 68/77, loss: 0.01680179126560688 2023-01-22 10:04:45.804857: step: 72/77, loss: 0.037707582116127014 2023-01-22 10:04:47.243680: step: 76/77, loss: 0.0043097869493067265 2023-01-22 10:04:48.665551: step: 80/77, loss: 0.00547909876331687 2023-01-22 10:04:50.085944: step: 84/77, loss: 0.013198956847190857 2023-01-22 10:04:51.565935: step: 88/77, loss: 0.15006424486637115 2023-01-22 10:04:53.050175: step: 92/77, loss: 0.023897048085927963 2023-01-22 10:04:54.473576: step: 96/77, loss: 0.030548155307769775 2023-01-22 10:04:55.848326: step: 100/77, loss: 0.0020266990177333355 2023-01-22 10:04:57.281829: step: 104/77, loss: 0.0009935040725395083 2023-01-22 10:04:58.777281: step: 108/77, loss: 0.015393025241792202 2023-01-22 10:05:00.217849: step: 112/77, loss: 0.033583465963602066 2023-01-22 10:05:01.672775: step: 116/77, loss: 0.0640583410859108 2023-01-22 10:05:03.100383: step: 120/77, loss: 0.019880419597029686 2023-01-22 10:05:04.572280: step: 124/77, loss: 0.007873378694057465 2023-01-22 10:05:05.989502: step: 128/77, loss: 0.00377391604706645 2023-01-22 10:05:07.377897: step: 132/77, loss: 0.04253475368022919 2023-01-22 10:05:08.811770: step: 136/77, loss: 0.0010281100403517485 2023-01-22 10:05:10.273858: step: 140/77, loss: 5.9074875025544316e-05 2023-01-22 10:05:11.682727: step: 144/77, loss: 0.004570574499666691 2023-01-22 10:05:13.137819: step: 148/77, loss: 0.019134998321533203 2023-01-22 10:05:14.566347: step: 152/77, loss: 0.01529659703373909 2023-01-22 10:05:15.987983: step: 156/77, loss: 0.010097681544721127 2023-01-22 10:05:17.493395: step: 160/77, loss: 0.003892091568559408 2023-01-22 10:05:18.911638: step: 164/77, loss: 0.008095276542007923 2023-01-22 10:05:20.326320: step: 168/77, loss: 0.006381608545780182 2023-01-22 10:05:21.780256: step: 172/77, loss: 0.00017421241500414908 2023-01-22 10:05:23.279079: step: 176/77, loss: 0.010663943365216255 2023-01-22 10:05:24.698985: step: 180/77, loss: 0.002063462045043707 2023-01-22 10:05:26.120629: step: 184/77, loss: 0.0066839721985161304 2023-01-22 10:05:27.525381: step: 188/77, loss: 0.035569749772548676 2023-01-22 10:05:28.938331: step: 192/77, loss: 0.02284320630133152 2023-01-22 10:05:30.387479: step: 196/77, loss: 0.011159653775393963 2023-01-22 10:05:31.799736: step: 200/77, loss: 0.0004205276200082153 2023-01-22 10:05:33.268221: step: 204/77, loss: 0.0013540438376367092 2023-01-22 10:05:34.729799: step: 208/77, loss: 0.010324251838028431 2023-01-22 10:05:36.309687: step: 212/77, loss: 0.042814191430807114 2023-01-22 10:05:37.682328: step: 216/77, loss: 0.009010246023535728 2023-01-22 10:05:39.152315: step: 220/77, loss: 0.03980337828397751 2023-01-22 10:05:40.578333: step: 224/77, loss: 0.006109605543315411 2023-01-22 10:05:42.038980: step: 228/77, loss: 0.011437594890594482 2023-01-22 10:05:43.453882: step: 232/77, loss: 0.022036511451005936 2023-01-22 10:05:44.878701: step: 236/77, loss: 0.006096334662288427 2023-01-22 10:05:46.312236: step: 240/77, loss: 7.65419281378854e-06 2023-01-22 10:05:47.778402: step: 244/77, loss: 0.0005192124517634511 2023-01-22 10:05:49.140016: step: 248/77, loss: 0.012744538486003876 2023-01-22 10:05:50.647498: step: 252/77, loss: 7.806696521583945e-05 2023-01-22 10:05:52.143618: step: 256/77, loss: 0.016540756449103355 2023-01-22 10:05:53.625174: step: 260/77, loss: 0.0001507106062490493 2023-01-22 10:05:55.075080: step: 264/77, loss: 0.08338964730501175 2023-01-22 10:05:56.607303: step: 268/77, loss: 0.039350226521492004 2023-01-22 10:05:57.986922: step: 272/77, loss: 0.01819436624646187 2023-01-22 10:05:59.432128: step: 276/77, loss: 0.06712066382169724 2023-01-22 10:06:00.833753: step: 280/77, loss: 0.00011724078649422154 2023-01-22 10:06:02.295733: step: 284/77, loss: 0.0063972026109695435 2023-01-22 10:06:03.769199: step: 288/77, loss: 4.31464723078534e-05 2023-01-22 10:06:05.179370: step: 292/77, loss: 1.0793160981847905e-05 2023-01-22 10:06:06.575283: step: 296/77, loss: 0.0031588266137987375 2023-01-22 10:06:08.008544: step: 300/77, loss: 5.132694059284404e-05 2023-01-22 10:06:09.481784: step: 304/77, loss: 0.0010889836121350527 2023-01-22 10:06:10.931228: step: 308/77, loss: 0.0011067269369959831 2023-01-22 10:06:12.434861: step: 312/77, loss: 0.002232741564512253 2023-01-22 10:06:13.923764: step: 316/77, loss: 0.009018322452902794 2023-01-22 10:06:15.247511: step: 320/77, loss: 0.04016238823533058 2023-01-22 10:06:16.723445: step: 324/77, loss: 0.02760501578450203 2023-01-22 10:06:18.185585: step: 328/77, loss: 0.021644847467541695 2023-01-22 10:06:19.597765: step: 332/77, loss: 0.007411382161080837 2023-01-22 10:06:20.957167: step: 336/77, loss: 0.0015520785236731172 2023-01-22 10:06:22.432822: step: 340/77, loss: 0.0025608709547668695 2023-01-22 10:06:23.905440: step: 344/77, loss: 0.07523033022880554 2023-01-22 10:06:25.377510: step: 348/77, loss: 0.0020249553490430117 2023-01-22 10:06:26.823851: step: 352/77, loss: 0.0007536716875620186 2023-01-22 10:06:28.265742: step: 356/77, loss: 0.0054727462120354176 2023-01-22 10:06:29.721466: step: 360/77, loss: 0.010609529912471771 2023-01-22 10:06:31.153997: step: 364/77, loss: 0.0005191811360418797 2023-01-22 10:06:32.570692: step: 368/77, loss: 3.0023405997781083e-05 2023-01-22 10:06:33.980092: step: 372/77, loss: 1.870140476967208e-05 2023-01-22 10:06:35.396098: step: 376/77, loss: 0.0015816589584574103 2023-01-22 10:06:36.832258: step: 380/77, loss: 0.0009871306829154491 2023-01-22 10:06:38.284077: step: 384/77, loss: 0.013140087947249413 2023-01-22 10:06:39.721939: step: 388/77, loss: 0.05055278539657593 ================================================== Loss: 0.017 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 12} Test Chinese: {'template': {'p': 0.9315068493150684, 'r': 0.53125, 'f1': 0.6766169154228856}, 'slot': {'p': 0.5405405405405406, 'r': 0.019627085377821395, 'f1': 0.03787878787878788}, 'combined': 0.025629428614503243, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 12} Test Korean: {'template': {'p': 0.9315068493150684, 'r': 0.53125, 'f1': 0.6766169154228856}, 'slot': {'p': 0.5428571428571428, 'r': 0.018645731108930325, 'f1': 0.036053130929791274}, 'combined': 0.024394158241052805, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 12} Test Russian: {'template': {'p': 0.9305555555555556, 'r': 0.5234375, 'f1': 0.6699999999999999}, 'slot': {'p': 0.5405405405405406, 'r': 0.019627085377821395, 'f1': 0.03787878787878788}, 'combined': 0.025378787878787876, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 13 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 10:08:21.558725: step: 4/77, loss: 0.0028693326748907566 2023-01-22 10:08:22.983793: step: 8/77, loss: 0.00885781366378069 2023-01-22 10:08:24.435539: step: 12/77, loss: 0.018565163016319275 2023-01-22 10:08:25.924086: step: 16/77, loss: 0.003598729381337762 2023-01-22 10:08:27.384356: step: 20/77, loss: 0.020400619134306908 2023-01-22 10:08:28.849636: step: 24/77, loss: 0.030392616987228394 2023-01-22 10:08:30.322654: step: 28/77, loss: 0.018394771963357925 2023-01-22 10:08:31.715202: step: 32/77, loss: 0.010670867748558521 2023-01-22 10:08:33.197112: step: 36/77, loss: 0.0007476868922822177 2023-01-22 10:08:34.637646: step: 40/77, loss: 0.0008807579288259149 2023-01-22 10:08:36.047487: step: 44/77, loss: 0.001965353498235345 2023-01-22 10:08:37.511456: step: 48/77, loss: 0.006215990521013737 2023-01-22 10:08:38.969898: step: 52/77, loss: 0.0004018288746010512 2023-01-22 10:08:40.427482: step: 56/77, loss: 0.0014737469609826803 2023-01-22 10:08:41.847946: step: 60/77, loss: 0.11779823899269104 2023-01-22 10:08:43.299459: step: 64/77, loss: 0.0219552181661129 2023-01-22 10:08:44.678459: step: 68/77, loss: 0.0028365645557641983 2023-01-22 10:08:46.094327: step: 72/77, loss: 0.030482318252325058 2023-01-22 10:08:47.589891: step: 76/77, loss: 0.3921399712562561 2023-01-22 10:08:49.040802: step: 80/77, loss: 0.062012821435928345 2023-01-22 10:08:50.451313: step: 84/77, loss: 0.01332969218492508 2023-01-22 10:08:51.936775: step: 88/77, loss: 0.01111157238483429 2023-01-22 10:08:53.353113: step: 92/77, loss: 0.0018552043475210667 2023-01-22 10:08:54.844825: step: 96/77, loss: 0.014295405708253384 2023-01-22 10:08:56.191679: step: 100/77, loss: 0.06454520672559738 2023-01-22 10:08:57.607831: step: 104/77, loss: 0.0072874510660767555 2023-01-22 10:08:58.986853: step: 108/77, loss: 0.00016964730457402766 2023-01-22 10:09:00.405456: step: 112/77, loss: 0.0008886498399078846 2023-01-22 10:09:01.808265: step: 116/77, loss: 0.03283444419503212 2023-01-22 10:09:03.205876: step: 120/77, loss: 0.0010433443821966648 2023-01-22 10:09:04.565285: step: 124/77, loss: 0.009966873563826084 2023-01-22 10:09:05.994378: step: 128/77, loss: 0.0013519097119569778 2023-01-22 10:09:07.480574: step: 132/77, loss: 0.00016006355872377753 2023-01-22 10:09:08.901979: step: 136/77, loss: 0.045845817774534225 2023-01-22 10:09:10.385283: step: 140/77, loss: 0.0009190597338601947 2023-01-22 10:09:11.857410: step: 144/77, loss: 0.010922224260866642 2023-01-22 10:09:13.310341: step: 148/77, loss: 0.022966429591178894 2023-01-22 10:09:14.754378: step: 152/77, loss: 0.003330930834636092 2023-01-22 10:09:16.245282: step: 156/77, loss: 0.0006601756322197616 2023-01-22 10:09:17.662646: step: 160/77, loss: 0.024167301133275032 2023-01-22 10:09:19.047164: step: 164/77, loss: 0.032688651233911514 2023-01-22 10:09:20.455370: step: 168/77, loss: 0.012217102572321892 2023-01-22 10:09:21.897171: step: 172/77, loss: 0.031610578298568726 2023-01-22 10:09:23.227269: step: 176/77, loss: 0.012368066236376762 2023-01-22 10:09:24.682984: step: 180/77, loss: 0.002754107117652893 2023-01-22 10:09:26.103575: step: 184/77, loss: 0.007287105079740286 2023-01-22 10:09:27.591697: step: 188/77, loss: 0.0005752938450314105 2023-01-22 10:09:29.053244: step: 192/77, loss: 0.0035592676140367985 2023-01-22 10:09:30.399219: step: 196/77, loss: 0.0022799689322710037 2023-01-22 10:09:31.821782: step: 200/77, loss: 0.04851929470896721 2023-01-22 10:09:33.267127: step: 204/77, loss: 0.002934606047347188 2023-01-22 10:09:34.726728: step: 208/77, loss: 0.0063961283303797245 2023-01-22 10:09:36.159954: step: 212/77, loss: 0.00015372905181720853 2023-01-22 10:09:37.661505: step: 216/77, loss: 0.0050109392032027245 2023-01-22 10:09:39.157085: step: 220/77, loss: 0.0013842899352312088 2023-01-22 10:09:40.573762: step: 224/77, loss: 0.0016586286947131157 2023-01-22 10:09:41.992504: step: 228/77, loss: 0.04991191625595093 2023-01-22 10:09:43.483688: step: 232/77, loss: 0.028952427208423615 2023-01-22 10:09:44.897373: step: 236/77, loss: 0.0029057434294372797 2023-01-22 10:09:46.358552: step: 240/77, loss: 7.611663932038937e-06 2023-01-22 10:09:47.822657: step: 244/77, loss: 0.008669132366776466 2023-01-22 10:09:49.307952: step: 248/77, loss: 0.06115046888589859 2023-01-22 10:09:50.727456: step: 252/77, loss: 0.0016680802218616009 2023-01-22 10:09:52.123058: step: 256/77, loss: 0.013491089455783367 2023-01-22 10:09:53.568981: step: 260/77, loss: 0.010971372947096825 2023-01-22 10:09:55.065811: step: 264/77, loss: 0.019208434969186783 2023-01-22 10:09:56.466671: step: 268/77, loss: 0.0001701193832559511 2023-01-22 10:09:57.930438: step: 272/77, loss: 0.01278771460056305 2023-01-22 10:09:59.363611: step: 276/77, loss: 3.134423604933545e-05 2023-01-22 10:10:00.867887: step: 280/77, loss: 0.00923230778425932 2023-01-22 10:10:02.312905: step: 284/77, loss: 0.003195566590875387 2023-01-22 10:10:03.737308: step: 288/77, loss: 0.00010174508497584611 2023-01-22 10:10:05.126404: step: 292/77, loss: 0.036582957953214645 2023-01-22 10:10:06.607833: step: 296/77, loss: 0.00014206249034032226 2023-01-22 10:10:08.003427: step: 300/77, loss: 0.03389093652367592 2023-01-22 10:10:09.443135: step: 304/77, loss: 0.06591126322746277 2023-01-22 10:10:10.890935: step: 308/77, loss: 0.0071951295249164104 2023-01-22 10:10:12.409236: step: 312/77, loss: 0.01362125389277935 2023-01-22 10:10:13.788367: step: 316/77, loss: 0.044941496104002 2023-01-22 10:10:15.271051: step: 320/77, loss: 0.000544301641639322 2023-01-22 10:10:16.726285: step: 324/77, loss: 0.012037895619869232 2023-01-22 10:10:18.196844: step: 328/77, loss: 0.0004972864990122616 2023-01-22 10:10:19.656265: step: 332/77, loss: 0.02681150659918785 2023-01-22 10:10:21.075638: step: 336/77, loss: 0.059612877666950226 2023-01-22 10:10:22.484543: step: 340/77, loss: 0.03380858525633812 2023-01-22 10:10:23.908220: step: 344/77, loss: 0.00674303388223052 2023-01-22 10:10:25.434594: step: 348/77, loss: 0.00896853394806385 2023-01-22 10:10:26.817104: step: 352/77, loss: 0.04173743352293968 2023-01-22 10:10:28.340229: step: 356/77, loss: 0.03315752372145653 2023-01-22 10:10:29.793193: step: 360/77, loss: 0.0027552645187824965 2023-01-22 10:10:31.186977: step: 364/77, loss: 0.022793779149651527 2023-01-22 10:10:32.594230: step: 368/77, loss: 0.01640704646706581 2023-01-22 10:10:34.052436: step: 372/77, loss: 0.00779336504638195 2023-01-22 10:10:35.539539: step: 376/77, loss: 0.007757310755550861 2023-01-22 10:10:36.956228: step: 380/77, loss: 0.01880963332951069 2023-01-22 10:10:38.391157: step: 384/77, loss: 0.014945488423109055 2023-01-22 10:10:39.822352: step: 388/77, loss: 0.00010710747301345691 ================================================== Loss: 0.020 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Chinese: {'template': {'p': 0.9473684210526315, 'r': 0.5625, 'f1': 0.7058823529411765}, 'slot': {'p': 0.5135135135135135, 'r': 0.018645731108930325, 'f1': 0.035984848484848495}, 'combined': 0.025401069518716585, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Korean: {'template': {'p': 0.9342105263157895, 'r': 0.5546875, 'f1': 0.6960784313725491}, 'slot': {'p': 0.4864864864864865, 'r': 0.017664376840039256, 'f1': 0.03409090909090909}, 'combined': 0.023729946524064172, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Russian: {'template': {'p': 0.935064935064935, 'r': 0.5625, 'f1': 0.702439024390244}, 'slot': {'p': 0.5135135135135135, 'r': 0.018645731108930325, 'f1': 0.035984848484848495}, 'combined': 0.025277161862527726, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 14 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 10:12:22.114610: step: 4/77, loss: 0.005267225205898285 2023-01-22 10:12:23.561304: step: 8/77, loss: 0.011591697111725807 2023-01-22 10:12:24.951249: step: 12/77, loss: 0.0173267163336277 2023-01-22 10:12:26.404133: step: 16/77, loss: 0.02167614735662937 2023-01-22 10:12:27.835347: step: 20/77, loss: 0.0008013755432330072 2023-01-22 10:12:29.244680: step: 24/77, loss: 5.200561281526461e-05 2023-01-22 10:12:30.667830: step: 28/77, loss: 0.0007757511339150369 2023-01-22 10:12:32.141580: step: 32/77, loss: 0.022520437836647034 2023-01-22 10:12:33.563884: step: 36/77, loss: 0.0003649297577794641 2023-01-22 10:12:35.056436: step: 40/77, loss: 0.009602165780961514 2023-01-22 10:12:36.575032: step: 44/77, loss: 0.0013668169267475605 2023-01-22 10:12:37.982307: step: 48/77, loss: 0.05336320400238037 2023-01-22 10:12:39.390800: step: 52/77, loss: 0.009318229742348194 2023-01-22 10:12:40.819614: step: 56/77, loss: 0.0022584404796361923 2023-01-22 10:12:42.243751: step: 60/77, loss: 0.0013622129335999489 2023-01-22 10:12:43.665612: step: 64/77, loss: 0.0027268631383776665 2023-01-22 10:12:45.131084: step: 68/77, loss: 0.0006924830377101898 2023-01-22 10:12:46.618491: step: 72/77, loss: 0.001636262284591794 2023-01-22 10:12:48.061098: step: 76/77, loss: 0.023755662143230438 2023-01-22 10:12:49.438245: step: 80/77, loss: 0.0003746433067135513 2023-01-22 10:12:50.896970: step: 84/77, loss: 0.0005386627744883299 2023-01-22 10:12:52.286382: step: 88/77, loss: 0.00035149790346622467 2023-01-22 10:12:53.767691: step: 92/77, loss: 0.004147316329181194 2023-01-22 10:12:55.156197: step: 96/77, loss: 0.0008540649432688951 2023-01-22 10:12:56.616095: step: 100/77, loss: 0.021562719717621803 2023-01-22 10:12:58.080950: step: 104/77, loss: 0.011822369880974293 2023-01-22 10:12:59.534757: step: 108/77, loss: 0.0012906633783131838 2023-01-22 10:13:00.963837: step: 112/77, loss: 0.05661414936184883 2023-01-22 10:13:02.425249: step: 116/77, loss: 0.0007475563324987888 2023-01-22 10:13:03.828200: step: 120/77, loss: 0.003470787312835455 2023-01-22 10:13:05.307552: step: 124/77, loss: 0.04624112322926521 2023-01-22 10:13:06.772915: step: 128/77, loss: 7.761608139844611e-05 2023-01-22 10:13:08.205073: step: 132/77, loss: 0.009200241416692734 2023-01-22 10:13:09.664342: step: 136/77, loss: 0.00542981643229723 2023-01-22 10:13:11.083290: step: 140/77, loss: 3.0431778213824145e-05 2023-01-22 10:13:12.582200: step: 144/77, loss: 0.006928062532097101 2023-01-22 10:13:13.992222: step: 148/77, loss: 0.09509888291358948 2023-01-22 10:13:15.402079: step: 152/77, loss: 0.0197527464479208 2023-01-22 10:13:16.822909: step: 156/77, loss: 0.0004752454406116158 2023-01-22 10:13:18.255492: step: 160/77, loss: 0.0003356131783220917 2023-01-22 10:13:19.662502: step: 164/77, loss: 0.0012678257189691067 2023-01-22 10:13:21.082463: step: 168/77, loss: 0.00036199152236804366 2023-01-22 10:13:22.503906: step: 172/77, loss: 0.03243018686771393 2023-01-22 10:13:23.899907: step: 176/77, loss: 0.01618010364472866 2023-01-22 10:13:25.354016: step: 180/77, loss: 0.007925470359623432 2023-01-22 10:13:26.760359: step: 184/77, loss: 0.001082070404663682 2023-01-22 10:13:28.162471: step: 188/77, loss: 0.002177658025175333 2023-01-22 10:13:29.588121: step: 192/77, loss: 2.7793914341600612e-05 2023-01-22 10:13:31.092886: step: 196/77, loss: 0.006863879971206188 2023-01-22 10:13:32.461781: step: 200/77, loss: 0.00818274449557066 2023-01-22 10:13:33.851753: step: 204/77, loss: 0.0021532122045755386 2023-01-22 10:13:35.306667: step: 208/77, loss: 0.003785195993259549 2023-01-22 10:13:36.672376: step: 212/77, loss: 0.0032439667265862226 2023-01-22 10:13:38.169901: step: 216/77, loss: 0.06093345209956169 2023-01-22 10:13:39.623532: step: 220/77, loss: 0.0012553473934531212 2023-01-22 10:13:41.030857: step: 224/77, loss: 0.0009988200617954135 2023-01-22 10:13:42.454236: step: 228/77, loss: 0.007733221631497145 2023-01-22 10:13:43.892693: step: 232/77, loss: 0.040917910635471344 2023-01-22 10:13:45.319697: step: 236/77, loss: 0.0003634836757555604 2023-01-22 10:13:46.732880: step: 240/77, loss: 0.0016613565385341644 2023-01-22 10:13:48.159532: step: 244/77, loss: 0.0347539484500885 2023-01-22 10:13:49.647585: step: 248/77, loss: 0.027186525985598564 2023-01-22 10:13:51.090004: step: 252/77, loss: 0.006373404059559107 2023-01-22 10:13:52.547390: step: 256/77, loss: 2.8301956263021566e-05 2023-01-22 10:13:53.968999: step: 260/77, loss: 0.0018408658215776086 2023-01-22 10:13:55.496049: step: 264/77, loss: 0.08936366438865662 2023-01-22 10:13:56.914931: step: 268/77, loss: 0.0006639264174737036 2023-01-22 10:13:58.365194: step: 272/77, loss: 0.0018201242201030254 2023-01-22 10:13:59.804487: step: 276/77, loss: 0.055738504976034164 2023-01-22 10:14:01.254295: step: 280/77, loss: 2.838547743522213e-06 2023-01-22 10:14:02.699246: step: 284/77, loss: 0.0014087861636653543 2023-01-22 10:14:04.145770: step: 288/77, loss: 5.1704242650885135e-05 2023-01-22 10:14:05.589629: step: 292/77, loss: 0.0033315045293420553 2023-01-22 10:14:07.064887: step: 296/77, loss: 0.01905788853764534 2023-01-22 10:14:08.490755: step: 300/77, loss: 0.005263179074972868 2023-01-22 10:14:09.932900: step: 304/77, loss: 0.0037886740174144506 2023-01-22 10:14:11.360931: step: 308/77, loss: 0.07296175509691238 2023-01-22 10:14:12.804561: step: 312/77, loss: 0.0009286962449550629 2023-01-22 10:14:14.262410: step: 316/77, loss: 0.0003937944129575044 2023-01-22 10:14:15.689361: step: 320/77, loss: 0.00011244205961702392 2023-01-22 10:14:17.138660: step: 324/77, loss: 0.02179723232984543 2023-01-22 10:14:18.636633: step: 328/77, loss: 0.04251597821712494 2023-01-22 10:14:20.102671: step: 332/77, loss: 0.00040819545392878354 2023-01-22 10:14:21.570470: step: 336/77, loss: 5.535347736440599e-05 2023-01-22 10:14:23.024781: step: 340/77, loss: 0.011182352900505066 2023-01-22 10:14:24.475309: step: 344/77, loss: 0.0029422559309750795 2023-01-22 10:14:25.901961: step: 348/77, loss: 0.003457922488451004 2023-01-22 10:14:27.343529: step: 352/77, loss: 0.00013586209388449788 2023-01-22 10:14:28.737701: step: 356/77, loss: 5.510602568392642e-05 2023-01-22 10:14:30.206126: step: 360/77, loss: 9.337370283901691e-05 2023-01-22 10:14:31.601762: step: 364/77, loss: 0.011819742619991302 2023-01-22 10:14:33.080449: step: 368/77, loss: 0.00814152229577303 2023-01-22 10:14:34.493343: step: 372/77, loss: 0.029679525643587112 2023-01-22 10:14:35.853830: step: 376/77, loss: 0.033626340329647064 2023-01-22 10:14:37.293794: step: 380/77, loss: 0.015861092135310173 2023-01-22 10:14:38.747857: step: 384/77, loss: 0.00013995288463775069 2023-01-22 10:14:40.140767: step: 388/77, loss: 0.0010508573614060879 ================================================== Loss: 0.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5166666666666667, 'f1': 0.6813186813186815}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.045661569471093295, 'epoch': 14} Test Chinese: {'template': {'p': 0.9154929577464789, 'r': 0.5078125, 'f1': 0.6532663316582914}, 'slot': {'p': 0.5483870967741935, 'r': 0.016683022571148183, 'f1': 0.03238095238095238}, 'combined': 0.021153385977506576, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5166666666666667, 'f1': 0.6813186813186815}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.045661569471093295, 'epoch': 14} Test Korean: {'template': {'p': 0.9130434782608695, 'r': 0.4921875, 'f1': 0.6395939086294415}, 'slot': {'p': 0.5172413793103449, 'r': 0.014720314033366046, 'f1': 0.028625954198473285}, 'combined': 0.0183089859340489, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5166666666666667, 'f1': 0.6813186813186815}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.045661569471093295, 'epoch': 14} Test Russian: {'template': {'p': 0.9142857142857143, 'r': 0.5, 'f1': 0.6464646464646465}, 'slot': {'p': 0.5172413793103449, 'r': 0.014720314033366046, 'f1': 0.028625954198473285}, 'combined': 0.018505667360629197, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 15 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 10:16:21.652280: step: 4/77, loss: 0.06576584279537201 2023-01-22 10:16:23.056292: step: 8/77, loss: 0.0211432334035635 2023-01-22 10:16:24.390904: step: 12/77, loss: 0.00011878870282089338 2023-01-22 10:16:25.742851: step: 16/77, loss: 0.0009247027919627726 2023-01-22 10:16:27.163359: step: 20/77, loss: 0.00018795518553815782 2023-01-22 10:16:28.548676: step: 24/77, loss: 0.002515072701498866 2023-01-22 10:16:29.972398: step: 28/77, loss: 0.0043100942857563496 2023-01-22 10:16:31.441987: step: 32/77, loss: 0.002241323236376047 2023-01-22 10:16:32.862924: step: 36/77, loss: 0.005226944573223591 2023-01-22 10:16:34.314544: step: 40/77, loss: 0.0135931596159935 2023-01-22 10:16:35.800005: step: 44/77, loss: 0.0008732576388865709 2023-01-22 10:16:37.203903: step: 48/77, loss: 0.0009864146122708917 2023-01-22 10:16:38.627426: step: 52/77, loss: 0.0003585018857847899 2023-01-22 10:16:40.081746: step: 56/77, loss: 2.714387301239185e-05 2023-01-22 10:16:41.500825: step: 60/77, loss: 0.0016551834996789694 2023-01-22 10:16:42.943425: step: 64/77, loss: 0.008181986398994923 2023-01-22 10:16:44.371143: step: 68/77, loss: 0.009752114303410053 2023-01-22 10:16:45.841797: step: 72/77, loss: 0.013987277634441853 2023-01-22 10:16:47.248981: step: 76/77, loss: 9.721517562866211e-05 2023-01-22 10:16:48.610356: step: 80/77, loss: 0.05479699745774269 2023-01-22 10:16:50.035187: step: 84/77, loss: 0.0022855522111058235 2023-01-22 10:16:51.427388: step: 88/77, loss: 0.009939520619809628 2023-01-22 10:16:52.759576: step: 92/77, loss: 4.864174479735084e-05 2023-01-22 10:16:54.171046: step: 96/77, loss: 0.0005828423891216516 2023-01-22 10:16:55.598488: step: 100/77, loss: 0.03715372458100319 2023-01-22 10:16:57.000944: step: 104/77, loss: 0.015810023993253708 2023-01-22 10:16:58.442845: step: 108/77, loss: 0.008435437455773354 2023-01-22 10:16:59.892563: step: 112/77, loss: 7.444271432177629e-06 2023-01-22 10:17:01.333131: step: 116/77, loss: 0.01651328057050705 2023-01-22 10:17:02.829428: step: 120/77, loss: 4.717539013654459e-06 2023-01-22 10:17:04.264189: step: 124/77, loss: 0.016156084835529327 2023-01-22 10:17:05.672001: step: 128/77, loss: 0.025796879082918167 2023-01-22 10:17:07.159115: step: 132/77, loss: 1.039034304994857e-05 2023-01-22 10:17:08.625525: step: 136/77, loss: 3.2849387935129926e-05 2023-01-22 10:17:10.025309: step: 140/77, loss: 0.009958821348845959 2023-01-22 10:17:11.473026: step: 144/77, loss: 0.0002767142141237855 2023-01-22 10:17:12.935395: step: 148/77, loss: 0.002541220746934414 2023-01-22 10:17:14.328613: step: 152/77, loss: 0.0062828054651618 2023-01-22 10:17:15.745423: step: 156/77, loss: 0.059553973376750946 2023-01-22 10:17:17.159975: step: 160/77, loss: 0.002758648945018649 2023-01-22 10:17:18.651227: step: 164/77, loss: 2.6639016141416505e-05 2023-01-22 10:17:20.151584: step: 168/77, loss: 0.003472244367003441 2023-01-22 10:17:21.624753: step: 172/77, loss: 2.9227057893876918e-05 2023-01-22 10:17:22.995261: step: 176/77, loss: 0.0006330236210487783 2023-01-22 10:17:24.452831: step: 180/77, loss: 0.006795175839215517 2023-01-22 10:17:25.892942: step: 184/77, loss: 0.0002546339819673449 2023-01-22 10:17:27.280614: step: 188/77, loss: 0.0009237767080776393 2023-01-22 10:17:28.719592: step: 192/77, loss: 0.022265290841460228 2023-01-22 10:17:30.147767: step: 196/77, loss: 0.00012686454283539206 2023-01-22 10:17:31.575110: step: 200/77, loss: 0.0052398075349628925 2023-01-22 10:17:33.021095: step: 204/77, loss: 0.023927073925733566 2023-01-22 10:17:34.423004: step: 208/77, loss: 0.0007230336777865887 2023-01-22 10:17:35.883231: step: 212/77, loss: 0.04853575676679611 2023-01-22 10:17:37.365328: step: 216/77, loss: 0.014252977445721626 2023-01-22 10:17:38.820612: step: 220/77, loss: 0.008642412722110748 2023-01-22 10:17:40.305306: step: 224/77, loss: 9.650964784668759e-05 2023-01-22 10:17:41.740043: step: 228/77, loss: 0.007102798670530319 2023-01-22 10:17:43.208725: step: 232/77, loss: 0.008989858441054821 2023-01-22 10:17:44.681881: step: 236/77, loss: 0.020074237138032913 2023-01-22 10:17:46.121154: step: 240/77, loss: 0.01964585669338703 2023-01-22 10:17:47.561869: step: 244/77, loss: 0.004278081934899092 2023-01-22 10:17:48.985250: step: 248/77, loss: 0.03222713619470596 2023-01-22 10:17:50.450922: step: 252/77, loss: 0.00010605229181237519 2023-01-22 10:17:51.932109: step: 256/77, loss: 0.07531239837408066 2023-01-22 10:17:53.371681: step: 260/77, loss: 0.012602516449987888 2023-01-22 10:17:54.871647: step: 264/77, loss: 0.008190508000552654 2023-01-22 10:17:56.341612: step: 268/77, loss: 0.06289426237344742 2023-01-22 10:17:57.760667: step: 272/77, loss: 0.0015910831280052662 2023-01-22 10:17:59.203238: step: 276/77, loss: 4.247297329129651e-05 2023-01-22 10:18:00.672812: step: 280/77, loss: 0.0022597406059503555 2023-01-22 10:18:02.100294: step: 284/77, loss: 0.0009675543988123536 2023-01-22 10:18:03.538391: step: 288/77, loss: 0.01921030879020691 2023-01-22 10:18:04.918822: step: 292/77, loss: 0.017905812710523605 2023-01-22 10:18:06.305259: step: 296/77, loss: 9.813245924306102e-06 2023-01-22 10:18:07.737544: step: 300/77, loss: 3.5145716537954286e-05 2023-01-22 10:18:09.184776: step: 304/77, loss: 0.023539673537015915 2023-01-22 10:18:10.568118: step: 308/77, loss: 0.024415859952569008 2023-01-22 10:18:12.067597: step: 312/77, loss: 0.0002481754054315388 2023-01-22 10:18:13.530922: step: 316/77, loss: 0.0004024395893793553 2023-01-22 10:18:14.970375: step: 320/77, loss: 0.0037798385601490736 2023-01-22 10:18:16.405625: step: 324/77, loss: 0.00010083715460496023 2023-01-22 10:18:17.861238: step: 328/77, loss: 0.007590695284307003 2023-01-22 10:18:19.286396: step: 332/77, loss: 0.0030735982581973076 2023-01-22 10:18:20.747818: step: 336/77, loss: 9.102160220209043e-06 2023-01-22 10:18:22.202224: step: 340/77, loss: 1.833957139751874e-05 2023-01-22 10:18:23.618268: step: 344/77, loss: 0.0004640549304895103 2023-01-22 10:18:25.060180: step: 348/77, loss: 0.00939967017620802 2023-01-22 10:18:26.500065: step: 352/77, loss: 0.01208038441836834 2023-01-22 10:18:27.957763: step: 356/77, loss: 0.022390833124518394 2023-01-22 10:18:29.388946: step: 360/77, loss: 0.08729444444179535 2023-01-22 10:18:30.790382: step: 364/77, loss: 0.006416450720280409 2023-01-22 10:18:32.288159: step: 368/77, loss: 0.033059343695640564 2023-01-22 10:18:33.813428: step: 372/77, loss: 0.009096910245716572 2023-01-22 10:18:35.251691: step: 376/77, loss: 0.0011584979947656393 2023-01-22 10:18:36.698568: step: 380/77, loss: 0.0014451435999944806 2023-01-22 10:18:38.160565: step: 384/77, loss: 1.3844704881194048e-05 2023-01-22 10:18:39.639366: step: 388/77, loss: 0.00019103710656054318 ================================================== Loss: 0.011 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.04756215508903682, 'epoch': 15} Test Chinese: {'template': {'p': 0.9420289855072463, 'r': 0.5078125, 'f1': 0.6598984771573604}, 'slot': {'p': 0.6129032258064516, 'r': 0.018645731108930325, 'f1': 0.03619047619047619}, 'combined': 0.023882040125694948, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.04756215508903682, 'epoch': 15} Test Korean: {'template': {'p': 0.9420289855072463, 'r': 0.5078125, 'f1': 0.6598984771573604}, 'slot': {'p': 0.6129032258064516, 'r': 0.018645731108930325, 'f1': 0.03619047619047619}, 'combined': 0.023882040125694948, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.04756215508903682, 'epoch': 15} Test Russian: {'template': {'p': 0.9411764705882353, 'r': 0.5, 'f1': 0.6530612244897959}, 'slot': {'p': 0.6129032258064516, 'r': 0.018645731108930325, 'f1': 0.03619047619047619}, 'combined': 0.023634596695821183, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 16 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 10:20:21.491775: step: 4/77, loss: 0.002274035243317485 2023-01-22 10:20:22.903324: step: 8/77, loss: 0.0019370621303096414 2023-01-22 10:20:24.424250: step: 12/77, loss: 0.054813411086797714 2023-01-22 10:20:25.803805: step: 16/77, loss: 0.008220945484936237 2023-01-22 10:20:27.243645: step: 20/77, loss: 1.7062940969481133e-05 2023-01-22 10:20:28.702748: step: 24/77, loss: 0.0018931454978883266 2023-01-22 10:20:30.157579: step: 28/77, loss: 0.002858012681826949 2023-01-22 10:20:31.651665: step: 32/77, loss: 0.0064864917658269405 2023-01-22 10:20:33.095531: step: 36/77, loss: 0.0004651574417948723 2023-01-22 10:20:34.559143: step: 40/77, loss: 0.09202048927545547 2023-01-22 10:20:36.012160: step: 44/77, loss: 0.001105927163735032 2023-01-22 10:20:37.398198: step: 48/77, loss: 0.04170398414134979 2023-01-22 10:20:38.795045: step: 52/77, loss: 0.01814689300954342 2023-01-22 10:20:40.289269: step: 56/77, loss: 0.015868939459323883 2023-01-22 10:20:41.661984: step: 60/77, loss: 0.0033887759782373905 2023-01-22 10:20:43.112835: step: 64/77, loss: 0.020517315715551376 2023-01-22 10:20:44.559404: step: 68/77, loss: 0.0010318057611584663 2023-01-22 10:20:46.008740: step: 72/77, loss: 0.01650240831077099 2023-01-22 10:20:47.459544: step: 76/77, loss: 0.0040339017286896706 2023-01-22 10:20:48.832191: step: 80/77, loss: 0.053633980453014374 2023-01-22 10:20:50.321732: step: 84/77, loss: 0.024445664137601852 2023-01-22 10:20:51.840451: step: 88/77, loss: 0.022687377408146858 2023-01-22 10:20:53.344680: step: 92/77, loss: 0.00024189718533307314 2023-01-22 10:20:54.778268: step: 96/77, loss: 0.001149466261267662 2023-01-22 10:20:56.182603: step: 100/77, loss: 0.022049574181437492 2023-01-22 10:20:57.640725: step: 104/77, loss: 0.0008618884021416306 2023-01-22 10:20:59.074532: step: 108/77, loss: 0.0030203552450984716 2023-01-22 10:21:00.505242: step: 112/77, loss: 0.001800397178158164 2023-01-22 10:21:01.900842: step: 116/77, loss: 0.0006228326237760484 2023-01-22 10:21:03.362610: step: 120/77, loss: 0.032183367758989334 2023-01-22 10:21:04.781956: step: 124/77, loss: 0.04554427042603493 2023-01-22 10:21:06.220919: step: 128/77, loss: 0.00010357674182159826 2023-01-22 10:21:07.610042: step: 132/77, loss: 0.0013730874052271247 2023-01-22 10:21:09.035434: step: 136/77, loss: 0.029175298288464546 2023-01-22 10:21:10.505060: step: 140/77, loss: 0.0006298382068052888 2023-01-22 10:21:11.953357: step: 144/77, loss: 6.223077798495069e-05 2023-01-22 10:21:13.398810: step: 148/77, loss: 0.003994886297732592 2023-01-22 10:21:14.826634: step: 152/77, loss: 0.013091296888887882 2023-01-22 10:21:16.264171: step: 156/77, loss: 0.0001558217772981152 2023-01-22 10:21:17.713300: step: 160/77, loss: 0.0014729941030964255 2023-01-22 10:21:19.218267: step: 164/77, loss: 0.0010588800068944693 2023-01-22 10:21:20.671238: step: 168/77, loss: 0.00016571594460401684 2023-01-22 10:21:22.136154: step: 172/77, loss: 0.009606136940419674 2023-01-22 10:21:23.594402: step: 176/77, loss: 0.00643984554335475 2023-01-22 10:21:25.025688: step: 180/77, loss: 0.0014251623069867492 2023-01-22 10:21:26.407914: step: 184/77, loss: 0.0024857092648744583 2023-01-22 10:21:27.818926: step: 188/77, loss: 0.08830219507217407 2023-01-22 10:21:29.210783: step: 192/77, loss: 0.0001782501203706488 2023-01-22 10:21:30.616368: step: 196/77, loss: 0.0014881398528814316 2023-01-22 10:21:32.075432: step: 200/77, loss: 7.254660886246711e-05 2023-01-22 10:21:33.562339: step: 204/77, loss: 0.0015049743233248591 2023-01-22 10:21:34.962297: step: 208/77, loss: 0.038156285881996155 2023-01-22 10:21:36.394110: step: 212/77, loss: 0.0020367216784507036 2023-01-22 10:21:37.804829: step: 216/77, loss: 0.003434864804148674 2023-01-22 10:21:39.178859: step: 220/77, loss: 0.0012388827744871378 2023-01-22 10:21:40.571946: step: 224/77, loss: 0.11639466136693954 2023-01-22 10:21:42.062401: step: 228/77, loss: 0.0008922365377657115 2023-01-22 10:21:43.432291: step: 232/77, loss: 0.045566074550151825 2023-01-22 10:21:44.881867: step: 236/77, loss: 0.0022351492661982775 2023-01-22 10:21:46.298845: step: 240/77, loss: 0.07056190073490143 2023-01-22 10:21:47.774921: step: 244/77, loss: 0.0027572649996727705 2023-01-22 10:21:49.205633: step: 248/77, loss: 0.00025906716473400593 2023-01-22 10:21:50.566274: step: 252/77, loss: 0.004387011285871267 2023-01-22 10:21:52.013930: step: 256/77, loss: 0.0001116612329497002 2023-01-22 10:21:53.393219: step: 260/77, loss: 0.005593471694737673 2023-01-22 10:21:54.791811: step: 264/77, loss: 0.03438470885157585 2023-01-22 10:21:56.241204: step: 268/77, loss: 0.0016064458759501576 2023-01-22 10:21:57.677068: step: 272/77, loss: 0.004222017712891102 2023-01-22 10:21:59.110665: step: 276/77, loss: 0.00023497124493587762 2023-01-22 10:22:00.558443: step: 280/77, loss: 0.007615208625793457 2023-01-22 10:22:02.027382: step: 284/77, loss: 0.030379055067896843 2023-01-22 10:22:03.498846: step: 288/77, loss: 7.958993955980986e-05 2023-01-22 10:22:04.944389: step: 292/77, loss: 7.129358436941402e-06 2023-01-22 10:22:06.359676: step: 296/77, loss: 0.007867962121963501 2023-01-22 10:22:07.754538: step: 300/77, loss: 0.003189869923517108 2023-01-22 10:22:09.147398: step: 304/77, loss: 0.025252973660826683 2023-01-22 10:22:10.649993: step: 308/77, loss: 0.0027913593221455812 2023-01-22 10:22:12.210787: step: 312/77, loss: 0.008064726367592812 2023-01-22 10:22:13.661062: step: 316/77, loss: 0.01227780431509018 2023-01-22 10:22:15.090496: step: 320/77, loss: 0.00796580035239458 2023-01-22 10:22:16.549640: step: 324/77, loss: 0.004041627515107393 2023-01-22 10:22:17.967545: step: 328/77, loss: 8.001786682143575e-07 2023-01-22 10:22:19.497297: step: 332/77, loss: 0.2514524459838867 2023-01-22 10:22:20.906621: step: 336/77, loss: 0.06413402408361435 2023-01-22 10:22:22.347334: step: 340/77, loss: 0.031314950436353683 2023-01-22 10:22:23.812567: step: 344/77, loss: 0.00578667875379324 2023-01-22 10:22:25.260073: step: 348/77, loss: 0.020943451672792435 2023-01-22 10:22:26.703957: step: 352/77, loss: 0.00128412083722651 2023-01-22 10:22:28.090935: step: 356/77, loss: 0.0031907805241644382 2023-01-22 10:22:29.534057: step: 360/77, loss: 0.009251225739717484 2023-01-22 10:22:30.980010: step: 364/77, loss: 0.00013018125900998712 2023-01-22 10:22:32.401560: step: 368/77, loss: 0.0030776180792599916 2023-01-22 10:22:33.853360: step: 372/77, loss: 0.0011256110155954957 2023-01-22 10:22:35.332328: step: 376/77, loss: 0.0024954641703516245 2023-01-22 10:22:36.752826: step: 380/77, loss: 0.0042509096674621105 2023-01-22 10:22:38.222755: step: 384/77, loss: 0.01576077565550804 2023-01-22 10:22:39.660148: step: 388/77, loss: 0.006246781442314386 ================================================== Loss: 0.016 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 16} Test Chinese: {'template': {'p': 0.8571428571428571, 'r': 0.5625, 'f1': 0.6792452830188678}, 'slot': {'p': 0.4878048780487805, 'r': 0.019627085377821395, 'f1': 0.03773584905660377}, 'combined': 0.025631897472410105, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 16} Test Korean: {'template': {'p': 0.8674698795180723, 'r': 0.5625, 'f1': 0.6824644549763033}, 'slot': {'p': 0.5128205128205128, 'r': 0.019627085377821395, 'f1': 0.03780718336483932}, 'combined': 0.025802058789274225, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 16} Test Russian: {'template': {'p': 0.8470588235294118, 'r': 0.5625, 'f1': 0.6760563380281689}, 'slot': {'p': 0.47619047619047616, 'r': 0.019627085377821395, 'f1': 0.037700282752120645}, 'combined': 0.02548751510002522, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 17 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 10:24:21.187498: step: 4/77, loss: 0.014770681969821453 2023-01-22 10:24:22.625682: step: 8/77, loss: 0.0876326635479927 2023-01-22 10:24:24.087368: step: 12/77, loss: 0.04138243570923805 2023-01-22 10:24:25.514783: step: 16/77, loss: 0.003260186407715082 2023-01-22 10:24:26.911978: step: 20/77, loss: 0.0027825208380818367 2023-01-22 10:24:28.283996: step: 24/77, loss: 0.00017264731286559254 2023-01-22 10:24:29.693246: step: 28/77, loss: 0.00026574067305773497 2023-01-22 10:24:31.157823: step: 32/77, loss: 0.0002445220889057964 2023-01-22 10:24:32.579996: step: 36/77, loss: 0.0008409050642512739 2023-01-22 10:24:34.023392: step: 40/77, loss: 0.0014837709022685885 2023-01-22 10:24:35.407489: step: 44/77, loss: 0.0001334149419562891 2023-01-22 10:24:36.902390: step: 48/77, loss: 0.0022824243642389774 2023-01-22 10:24:38.346600: step: 52/77, loss: 0.007556657772511244 2023-01-22 10:24:39.773589: step: 56/77, loss: 0.018492089584469795 2023-01-22 10:24:41.172430: step: 60/77, loss: 0.018770309165120125 2023-01-22 10:24:42.658219: step: 64/77, loss: 0.004591003060340881 2023-01-22 10:24:44.111448: step: 68/77, loss: 3.793321593548171e-05 2023-01-22 10:24:45.580387: step: 72/77, loss: 0.0016521599609404802 2023-01-22 10:24:46.955494: step: 76/77, loss: 8.729464752832428e-05 2023-01-22 10:24:48.413492: step: 80/77, loss: 2.5107715373451356e-06 2023-01-22 10:24:49.825917: step: 84/77, loss: 0.0007668976322747767 2023-01-22 10:24:51.251318: step: 88/77, loss: 0.00569231016561389 2023-01-22 10:24:52.721283: step: 92/77, loss: 0.04539667069911957 2023-01-22 10:24:54.179009: step: 96/77, loss: 0.004317179322242737 2023-01-22 10:24:55.623362: step: 100/77, loss: 0.00014809536514803767 2023-01-22 10:24:57.076655: step: 104/77, loss: 4.2761865188367665e-05 2023-01-22 10:24:58.505552: step: 108/77, loss: 0.0007335086120292544 2023-01-22 10:24:59.958955: step: 112/77, loss: 0.007040671072900295 2023-01-22 10:25:01.352972: step: 116/77, loss: 0.020112091675400734 2023-01-22 10:25:02.751500: step: 120/77, loss: 0.005425078794360161 2023-01-22 10:25:04.289681: step: 124/77, loss: 0.00016795247211121023 2023-01-22 10:25:05.744613: step: 128/77, loss: 0.019081249833106995 2023-01-22 10:25:07.244730: step: 132/77, loss: 0.019822169095277786 2023-01-22 10:25:08.680712: step: 136/77, loss: 0.0001944322866620496 2023-01-22 10:25:10.043993: step: 140/77, loss: 0.017132896929979324 2023-01-22 10:25:11.399695: step: 144/77, loss: 0.0003910954692400992 2023-01-22 10:25:12.793931: step: 148/77, loss: 0.0030151931568980217 2023-01-22 10:25:14.223279: step: 152/77, loss: 0.012330949306488037 2023-01-22 10:25:15.574949: step: 156/77, loss: 0.008543347008526325 2023-01-22 10:25:16.991591: step: 160/77, loss: 3.2266092603094876e-05 2023-01-22 10:25:18.370164: step: 164/77, loss: 0.01012550387531519 2023-01-22 10:25:19.723048: step: 168/77, loss: 0.006201722659170628 2023-01-22 10:25:21.102063: step: 172/77, loss: 0.005624229088425636 2023-01-22 10:25:22.557573: step: 176/77, loss: 4.4780652387999e-05 2023-01-22 10:25:24.009573: step: 180/77, loss: 0.024179434403777122 2023-01-22 10:25:25.433573: step: 184/77, loss: 9.032005618792027e-05 2023-01-22 10:25:26.847515: step: 188/77, loss: 0.0244180615991354 2023-01-22 10:25:28.255999: step: 192/77, loss: 0.03398584946990013 2023-01-22 10:25:29.634235: step: 196/77, loss: 0.0010689867194741964 2023-01-22 10:25:31.081151: step: 200/77, loss: 0.0035388905089348555 2023-01-22 10:25:32.519992: step: 204/77, loss: 0.010380645282566547 2023-01-22 10:25:34.003258: step: 208/77, loss: 0.03376935422420502 2023-01-22 10:25:35.371304: step: 212/77, loss: 0.018724530935287476 2023-01-22 10:25:36.790109: step: 216/77, loss: 0.002145115751773119 2023-01-22 10:25:38.233202: step: 220/77, loss: 0.001531552872620523 2023-01-22 10:25:39.696496: step: 224/77, loss: 0.022273162379860878 2023-01-22 10:25:41.085289: step: 228/77, loss: 0.00013202633999753743 2023-01-22 10:25:42.563756: step: 232/77, loss: 0.010384333319962025 2023-01-22 10:25:43.956449: step: 236/77, loss: 0.005090624559670687 2023-01-22 10:25:45.418119: step: 240/77, loss: 0.0037023834884166718 2023-01-22 10:25:46.838146: step: 244/77, loss: 0.0026756480801850557 2023-01-22 10:25:48.276024: step: 248/77, loss: 0.0014291125116869807 2023-01-22 10:25:49.698194: step: 252/77, loss: 0.00014384086534846574 2023-01-22 10:25:51.107586: step: 256/77, loss: 5.036376478528837e-06 2023-01-22 10:25:52.554944: step: 260/77, loss: 0.00022549970890395343 2023-01-22 10:25:53.978898: step: 264/77, loss: 0.0037392026279121637 2023-01-22 10:25:55.412576: step: 268/77, loss: 0.06179117038846016 2023-01-22 10:25:56.914481: step: 272/77, loss: 0.0005593567620962858 2023-01-22 10:25:58.351256: step: 276/77, loss: 0.00114069867413491 2023-01-22 10:25:59.788094: step: 280/77, loss: 0.002634587697684765 2023-01-22 10:26:01.236177: step: 284/77, loss: 0.0018559263553470373 2023-01-22 10:26:02.658944: step: 288/77, loss: 0.031140733510255814 2023-01-22 10:26:04.102720: step: 292/77, loss: 0.006226778961718082 2023-01-22 10:26:05.593078: step: 296/77, loss: 0.0006024112808518112 2023-01-22 10:26:07.075422: step: 300/77, loss: 0.0003451938973739743 2023-01-22 10:26:08.526587: step: 304/77, loss: 0.0025052230339497328 2023-01-22 10:26:09.942992: step: 308/77, loss: 0.05138927698135376 2023-01-22 10:26:11.376762: step: 312/77, loss: 0.0004222650022711605 2023-01-22 10:26:12.764782: step: 316/77, loss: 0.010836608707904816 2023-01-22 10:26:14.186744: step: 320/77, loss: 2.141235654562479e-06 2023-01-22 10:26:15.670038: step: 324/77, loss: 0.0036584362387657166 2023-01-22 10:26:17.163354: step: 328/77, loss: 0.011637730523943901 2023-01-22 10:26:18.621048: step: 332/77, loss: 0.0017785464879125357 2023-01-22 10:26:20.077305: step: 336/77, loss: 0.0008704649517312646 2023-01-22 10:26:21.521584: step: 340/77, loss: 0.0016544598620384932 2023-01-22 10:26:22.983135: step: 344/77, loss: 0.0001686064206296578 2023-01-22 10:26:24.398300: step: 348/77, loss: 3.0189055451046443e-06 2023-01-22 10:26:25.853910: step: 352/77, loss: 1.2028551282128319e-05 2023-01-22 10:26:27.274471: step: 356/77, loss: 0.005813794210553169 2023-01-22 10:26:28.696600: step: 360/77, loss: 0.018742024898529053 2023-01-22 10:26:30.106633: step: 364/77, loss: 0.0039033587090671062 2023-01-22 10:26:31.555572: step: 368/77, loss: 7.665553857805207e-05 2023-01-22 10:26:32.974601: step: 372/77, loss: 0.0037815242540091276 2023-01-22 10:26:34.430350: step: 376/77, loss: 0.0016702398424968123 2023-01-22 10:26:35.910231: step: 380/77, loss: 0.0018711560405790806 2023-01-22 10:26:37.408258: step: 384/77, loss: 0.004602421075105667 2023-01-22 10:26:38.957646: step: 388/77, loss: 0.005135645158588886 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Chinese: {'template': {'p': 0.9264705882352942, 'r': 0.4921875, 'f1': 0.6428571428571428}, 'slot': {'p': 0.6333333333333333, 'r': 0.018645731108930325, 'f1': 0.036224976167778845}, 'combined': 0.0232874846792864, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Korean: {'template': {'p': 0.9402985074626866, 'r': 0.4921875, 'f1': 0.6461538461538462}, 'slot': {'p': 0.6333333333333333, 'r': 0.018645731108930325, 'f1': 0.036224976167778845}, 'combined': 0.023406907677641718, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Russian: {'template': {'p': 0.9402985074626866, 'r': 0.4921875, 'f1': 0.6461538461538462}, 'slot': {'p': 0.6333333333333333, 'r': 0.018645731108930325, 'f1': 0.036224976167778845}, 'combined': 0.023406907677641718, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 18 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 10:28:20.941178: step: 4/77, loss: 0.00629640556871891 2023-01-22 10:28:22.334127: step: 8/77, loss: 1.3032102287979797e-05 2023-01-22 10:28:23.844071: step: 12/77, loss: 0.00390604161657393 2023-01-22 10:28:25.248890: step: 16/77, loss: 0.01602979004383087 2023-01-22 10:28:26.687883: step: 20/77, loss: 5.475866146298358e-06 2023-01-22 10:28:28.106898: step: 24/77, loss: 0.0031633723992854357 2023-01-22 10:28:29.590070: step: 28/77, loss: 0.014412499964237213 2023-01-22 10:28:31.001065: step: 32/77, loss: 2.1889086383453105e-06 2023-01-22 10:28:32.472102: step: 36/77, loss: 3.456926788203418e-05 2023-01-22 10:28:33.903461: step: 40/77, loss: 2.2342781448969617e-05 2023-01-22 10:28:35.355680: step: 44/77, loss: 0.005496377125382423 2023-01-22 10:28:36.782787: step: 48/77, loss: 0.0002522608556319028 2023-01-22 10:28:38.242476: step: 52/77, loss: 0.00036922251456417143 2023-01-22 10:28:39.664544: step: 56/77, loss: 0.00252554495818913 2023-01-22 10:28:41.063603: step: 60/77, loss: 0.021473003551363945 2023-01-22 10:28:42.485421: step: 64/77, loss: 0.003913488704711199 2023-01-22 10:28:43.959599: step: 68/77, loss: 0.003509226720780134 2023-01-22 10:28:45.391917: step: 72/77, loss: 0.00018533196998760104 2023-01-22 10:28:46.821219: step: 76/77, loss: 0.001384116942062974 2023-01-22 10:28:48.261967: step: 80/77, loss: 0.10798466950654984 2023-01-22 10:28:49.713863: step: 84/77, loss: 4.4980937673244625e-05 2023-01-22 10:28:51.138237: step: 88/77, loss: 0.00011074270878452808 2023-01-22 10:28:52.617207: step: 92/77, loss: 2.721722012211103e-05 2023-01-22 10:28:54.015313: step: 96/77, loss: 2.3289867385756224e-06 2023-01-22 10:28:55.402526: step: 100/77, loss: 6.403658062481554e-06 2023-01-22 10:28:56.909761: step: 104/77, loss: 0.0002635190321598202 2023-01-22 10:28:58.362075: step: 108/77, loss: 8.024475391721353e-05 2023-01-22 10:28:59.845993: step: 112/77, loss: 0.011582602746784687 2023-01-22 10:29:01.253555: step: 116/77, loss: 0.004289014730602503 2023-01-22 10:29:02.740300: step: 120/77, loss: 0.038263604044914246 2023-01-22 10:29:04.211659: step: 124/77, loss: 0.004939154721796513 2023-01-22 10:29:05.592098: step: 128/77, loss: 0.0020356210879981518 2023-01-22 10:29:06.954977: step: 132/77, loss: 3.482759348116815e-05 2023-01-22 10:29:08.443828: step: 136/77, loss: 0.001752390991896391 2023-01-22 10:29:09.843595: step: 140/77, loss: 0.015921536833047867 2023-01-22 10:29:11.265963: step: 144/77, loss: 2.1755639068032906e-07 2023-01-22 10:29:12.718362: step: 148/77, loss: 6.622581167903263e-06 2023-01-22 10:29:14.194497: step: 152/77, loss: 0.0007548375870101154 2023-01-22 10:29:15.638549: step: 156/77, loss: 0.025477493181824684 2023-01-22 10:29:17.050162: step: 160/77, loss: 0.00039732432924211025 2023-01-22 10:29:18.469096: step: 164/77, loss: 0.014110036194324493 2023-01-22 10:29:19.889733: step: 168/77, loss: 0.00010267073230352253 2023-01-22 10:29:21.272246: step: 172/77, loss: 0.0075200931169092655 2023-01-22 10:29:22.716654: step: 176/77, loss: 0.010753368958830833 2023-01-22 10:29:24.167172: step: 180/77, loss: 0.0007860129699110985 2023-01-22 10:29:25.632339: step: 184/77, loss: 0.021613411605358124 2023-01-22 10:29:27.112652: step: 188/77, loss: 0.004621770698577166 2023-01-22 10:29:28.505813: step: 192/77, loss: 2.2125350369606167e-05 2023-01-22 10:29:29.921981: step: 196/77, loss: 0.00014070799807086587 2023-01-22 10:29:31.414214: step: 200/77, loss: 0.022858303040266037 2023-01-22 10:29:32.870712: step: 204/77, loss: 9.427463737665676e-06 2023-01-22 10:29:34.278553: step: 208/77, loss: 0.0003459584841039032 2023-01-22 10:29:35.742440: step: 212/77, loss: 0.0033417996019124985 2023-01-22 10:29:37.170030: step: 216/77, loss: 0.01567632518708706 2023-01-22 10:29:38.580898: step: 220/77, loss: 0.014902174472808838 2023-01-22 10:29:39.960298: step: 224/77, loss: 0.00310932332649827 2023-01-22 10:29:41.416954: step: 228/77, loss: 0.056226640939712524 2023-01-22 10:29:42.893047: step: 232/77, loss: 4.0978045490192017e-07 2023-01-22 10:29:44.365046: step: 236/77, loss: 0.001314049819484353 2023-01-22 10:29:45.775092: step: 240/77, loss: 0.0017302314518019557 2023-01-22 10:29:47.270329: step: 244/77, loss: 0.007447042502462864 2023-01-22 10:29:48.738488: step: 248/77, loss: 0.06286231428384781 2023-01-22 10:29:50.195856: step: 252/77, loss: 0.00034409796353429556 2023-01-22 10:29:51.580552: step: 256/77, loss: 0.0007446880335919559 2023-01-22 10:29:52.977819: step: 260/77, loss: 0.02761266753077507 2023-01-22 10:29:54.426631: step: 264/77, loss: 0.032420236617326736 2023-01-22 10:29:55.858740: step: 268/77, loss: 0.0014822514494881034 2023-01-22 10:29:57.304733: step: 272/77, loss: 2.571425648056902e-05 2023-01-22 10:29:58.769684: step: 276/77, loss: 0.032803013920784 2023-01-22 10:30:00.181064: step: 280/77, loss: 0.0003158951294608414 2023-01-22 10:30:01.668485: step: 284/77, loss: 0.0009535959688946605 2023-01-22 10:30:03.193984: step: 288/77, loss: 0.007475333753973246 2023-01-22 10:30:04.600343: step: 292/77, loss: 0.002793649211525917 2023-01-22 10:30:06.034504: step: 296/77, loss: 0.002629114082083106 2023-01-22 10:30:07.428351: step: 300/77, loss: 0.0005797538906335831 2023-01-22 10:30:08.882614: step: 304/77, loss: 0.001783881220035255 2023-01-22 10:30:10.355254: step: 308/77, loss: 0.0007740409346297383 2023-01-22 10:30:11.839630: step: 312/77, loss: 0.06472983211278915 2023-01-22 10:30:13.246552: step: 316/77, loss: 0.00124368688557297 2023-01-22 10:30:14.712058: step: 320/77, loss: 0.0465131551027298 2023-01-22 10:30:16.163370: step: 324/77, loss: 0.07662312686443329 2023-01-22 10:30:17.597500: step: 328/77, loss: 0.0008284562500193715 2023-01-22 10:30:18.980209: step: 332/77, loss: 0.013321863487362862 2023-01-22 10:30:20.439864: step: 336/77, loss: 5.0740571168717e-05 2023-01-22 10:30:21.906571: step: 340/77, loss: 0.00013221157132647932 2023-01-22 10:30:23.266147: step: 344/77, loss: 0.002166406251490116 2023-01-22 10:30:24.760421: step: 348/77, loss: 0.006550523452460766 2023-01-22 10:30:26.220310: step: 352/77, loss: 0.022207271307706833 2023-01-22 10:30:27.710871: step: 356/77, loss: 7.443765935022384e-05 2023-01-22 10:30:29.160763: step: 360/77, loss: 0.0145955141633749 2023-01-22 10:30:30.550098: step: 364/77, loss: 0.15446336567401886 2023-01-22 10:30:31.967024: step: 368/77, loss: 0.008689656853675842 2023-01-22 10:30:33.443074: step: 372/77, loss: 0.001763333915732801 2023-01-22 10:30:34.907912: step: 376/77, loss: 7.676680252188817e-05 2023-01-22 10:30:36.406954: step: 380/77, loss: 0.017358239740133286 2023-01-22 10:30:37.739134: step: 384/77, loss: 0.0013745925389230251 2023-01-22 10:30:39.180818: step: 388/77, loss: 0.00026828423142433167 ================================================== Loss: 0.011 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Chinese: {'template': {'p': 0.9583333333333334, 'r': 0.5390625, 'f1': 0.69}, 'slot': {'p': 0.5641025641025641, 'r': 0.021589793915603533, 'f1': 0.04158790170132325}, 'combined': 0.028695652173913042, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Korean: {'template': {'p': 0.958904109589041, 'r': 0.546875, 'f1': 0.6965174129353234}, 'slot': {'p': 0.5526315789473685, 'r': 0.020608439646712464, 'f1': 0.03973509933774835}, 'combined': 0.027676188593456566, 'epoch': 18} Dev Russian: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 18} Test Russian: {'template': {'p': 0.958904109589041, 'r': 0.546875, 'f1': 0.6965174129353234}, 'slot': {'p': 0.5641025641025641, 'r': 0.021589793915603533, 'f1': 0.04158790170132325}, 'combined': 0.02896669770241421, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 19 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 10:32:20.606109: step: 4/77, loss: 0.00186650559771806 2023-01-22 10:32:22.038067: step: 8/77, loss: 0.011341345496475697 2023-01-22 10:32:23.479604: step: 12/77, loss: 0.00042457960080355406 2023-01-22 10:32:24.922905: step: 16/77, loss: 0.004452820401638746 2023-01-22 10:32:26.324337: step: 20/77, loss: 9.29928501136601e-06 2023-01-22 10:32:27.776700: step: 24/77, loss: 1.871081258286722e-05 2023-01-22 10:32:29.263414: step: 28/77, loss: 2.7184534701518714e-05 2023-01-22 10:32:30.708049: step: 32/77, loss: 0.0002637415600474924 2023-01-22 10:32:32.160535: step: 36/77, loss: 0.005314426030963659 2023-01-22 10:32:33.607350: step: 40/77, loss: 0.003934026230126619 2023-01-22 10:32:35.018697: step: 44/77, loss: 0.01600668765604496 2023-01-22 10:32:36.426683: step: 48/77, loss: 0.007245240733027458 2023-01-22 10:32:37.868523: step: 52/77, loss: 0.013580265454947948 2023-01-22 10:32:39.291669: step: 56/77, loss: 0.0537562258541584 2023-01-22 10:32:40.726346: step: 60/77, loss: 0.005792696960270405 2023-01-22 10:32:42.227539: step: 64/77, loss: 0.000631436298135668 2023-01-22 10:32:43.702099: step: 68/77, loss: 0.001448130700737238 2023-01-22 10:32:45.112489: step: 72/77, loss: 0.0027798700612038374 2023-01-22 10:32:46.523100: step: 76/77, loss: 0.08459872752428055 2023-01-22 10:32:47.989086: step: 80/77, loss: 0.0358988381922245 2023-01-22 10:32:49.496403: step: 84/77, loss: 0.04680085927248001 2023-01-22 10:32:50.927482: step: 88/77, loss: 2.6717007131082937e-06 2023-01-22 10:32:52.398714: step: 92/77, loss: 0.0044856686145067215 2023-01-22 10:32:53.848382: step: 96/77, loss: 7.837633893359452e-05 2023-01-22 10:32:55.359310: step: 100/77, loss: 4.082479063072242e-05 2023-01-22 10:32:56.754913: step: 104/77, loss: 0.009041888639330864 2023-01-22 10:32:58.192272: step: 108/77, loss: 0.0013675595400854945 2023-01-22 10:32:59.555371: step: 112/77, loss: 2.3649601644137874e-05 2023-01-22 10:33:00.989703: step: 116/77, loss: 0.0014585917815566063 2023-01-22 10:33:02.436770: step: 120/77, loss: 0.10662340372800827 2023-01-22 10:33:03.894167: step: 124/77, loss: 0.0008903276757337153 2023-01-22 10:33:05.388211: step: 128/77, loss: 0.012171868234872818 2023-01-22 10:33:06.800026: step: 132/77, loss: 1.0170209861826152e-05 2023-01-22 10:33:08.242875: step: 136/77, loss: 0.002750490326434374 2023-01-22 10:33:09.747304: step: 140/77, loss: 0.0001082413800759241 2023-01-22 10:33:11.180683: step: 144/77, loss: 0.017326457425951958 2023-01-22 10:33:12.584630: step: 148/77, loss: 0.00018802982231136411 2023-01-22 10:33:14.069429: step: 152/77, loss: 0.0004940549260936677 2023-01-22 10:33:15.537342: step: 156/77, loss: 0.021055961027741432 2023-01-22 10:33:16.935525: step: 160/77, loss: 0.007170780561864376 2023-01-22 10:33:18.344871: step: 164/77, loss: 0.04258148372173309 2023-01-22 10:33:19.773894: step: 168/77, loss: 4.662974242819473e-05 2023-01-22 10:33:21.220760: step: 172/77, loss: 0.006000472232699394 2023-01-22 10:33:22.669081: step: 176/77, loss: 0.022700928151607513 2023-01-22 10:33:24.062593: step: 180/77, loss: 0.041062355041503906 2023-01-22 10:33:25.540822: step: 184/77, loss: 0.00031986297108232975 2023-01-22 10:33:26.960571: step: 188/77, loss: 0.011816378682851791 2023-01-22 10:33:28.409134: step: 192/77, loss: 0.0010583401890471578 2023-01-22 10:33:29.866079: step: 196/77, loss: 0.004462048877030611 2023-01-22 10:33:31.266240: step: 200/77, loss: 0.045728087425231934 2023-01-22 10:33:32.698552: step: 204/77, loss: 0.0007927333936095238 2023-01-22 10:33:34.138368: step: 208/77, loss: 0.0013061045901849866 2023-01-22 10:33:35.542286: step: 212/77, loss: 0.002018637489527464 2023-01-22 10:33:36.983471: step: 216/77, loss: 6.201502401381731e-05 2023-01-22 10:33:38.398797: step: 220/77, loss: 0.0016469608526676893 2023-01-22 10:33:39.859930: step: 224/77, loss: 0.000588214083109051 2023-01-22 10:33:41.330310: step: 228/77, loss: 0.01477520540356636 2023-01-22 10:33:42.710005: step: 232/77, loss: 0.012828252278268337 2023-01-22 10:33:44.096229: step: 236/77, loss: 0.0024332371540367603 2023-01-22 10:33:45.508701: step: 240/77, loss: 0.00028775102691724896 2023-01-22 10:33:46.946170: step: 244/77, loss: 0.0565745048224926 2023-01-22 10:33:48.360468: step: 248/77, loss: 0.001304167089983821 2023-01-22 10:33:49.760009: step: 252/77, loss: 0.001137460581958294 2023-01-22 10:33:51.290147: step: 256/77, loss: 0.07675373554229736 2023-01-22 10:33:52.641783: step: 260/77, loss: 2.674641336852801e-06 2023-01-22 10:33:54.049530: step: 264/77, loss: 0.0002038546372205019 2023-01-22 10:33:55.498319: step: 268/77, loss: 0.0010206920560449362 2023-01-22 10:33:56.909137: step: 272/77, loss: 0.017133938148617744 2023-01-22 10:33:58.312370: step: 276/77, loss: 0.016053365543484688 2023-01-22 10:33:59.700841: step: 280/77, loss: 0.0016087039839476347 2023-01-22 10:34:01.041622: step: 284/77, loss: 0.01253533735871315 2023-01-22 10:34:02.542430: step: 288/77, loss: 0.0019009861862286925 2023-01-22 10:34:03.935018: step: 292/77, loss: 0.00038376866723410785 2023-01-22 10:34:05.391180: step: 296/77, loss: 0.002783339936286211 2023-01-22 10:34:06.882490: step: 300/77, loss: 0.0008510855259373784 2023-01-22 10:34:08.298137: step: 304/77, loss: 0.005121702328324318 2023-01-22 10:34:09.730456: step: 308/77, loss: 0.0007576277712360024 2023-01-22 10:34:11.167147: step: 312/77, loss: 9.761757974047214e-05 2023-01-22 10:34:12.615925: step: 316/77, loss: 0.08049427717924118 2023-01-22 10:34:13.995299: step: 320/77, loss: 5.37521846126765e-05 2023-01-22 10:34:15.429917: step: 324/77, loss: 0.00024958080030046403 2023-01-22 10:34:16.906490: step: 328/77, loss: 3.047224936381099e-06 2023-01-22 10:34:18.317801: step: 332/77, loss: 0.00022927092504687607 2023-01-22 10:34:19.778190: step: 336/77, loss: 3.923237818526104e-05 2023-01-22 10:34:21.234991: step: 340/77, loss: 4.395913128973916e-05 2023-01-22 10:34:22.645211: step: 344/77, loss: 1.4373413250723388e-05 2023-01-22 10:34:24.131628: step: 348/77, loss: 0.004309260752052069 2023-01-22 10:34:25.527686: step: 352/77, loss: 0.0005051622283644974 2023-01-22 10:34:26.913812: step: 356/77, loss: 0.003383730771020055 2023-01-22 10:34:28.318321: step: 360/77, loss: 0.016977690160274506 2023-01-22 10:34:29.717200: step: 364/77, loss: 0.010579852387309074 2023-01-22 10:34:31.127981: step: 368/77, loss: 0.0010027396492660046 2023-01-22 10:34:32.533571: step: 372/77, loss: 0.00314369797706604 2023-01-22 10:34:33.990756: step: 376/77, loss: 0.028379876166582108 2023-01-22 10:34:35.478007: step: 380/77, loss: 8.340930435224436e-06 2023-01-22 10:34:36.876588: step: 384/77, loss: 0.012038210406899452 2023-01-22 10:34:38.298332: step: 388/77, loss: 0.04218676686286926 ================================================== Loss: 0.011 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Chinese: {'template': {'p': 0.9230769230769231, 'r': 0.5625, 'f1': 0.6990291262135923}, 'slot': {'p': 0.5789473684210527, 'r': 0.021589793915603533, 'f1': 0.04162724692526017}, 'combined': 0.02909865804484206, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Korean: {'template': {'p': 0.9240506329113924, 'r': 0.5703125, 'f1': 0.7053140096618359}, 'slot': {'p': 0.5675675675675675, 'r': 0.020608439646712464, 'f1': 0.03977272727272728}, 'combined': 0.02805226174791393, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Russian: {'template': {'p': 0.9240506329113924, 'r': 0.5703125, 'f1': 0.7053140096618359}, 'slot': {'p': 0.5675675675675675, 'r': 0.020608439646712464, 'f1': 0.03977272727272728}, 'combined': 0.02805226174791393, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 20 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 10:36:19.723349: step: 4/77, loss: 1.0884960829571355e-05 2023-01-22 10:36:21.131886: step: 8/77, loss: 0.007430312689393759 2023-01-22 10:36:22.561556: step: 12/77, loss: 0.009909817017614841 2023-01-22 10:36:23.965195: step: 16/77, loss: 0.0011846608249470592 2023-01-22 10:36:25.400444: step: 20/77, loss: 1.698593951005023e-05 2023-01-22 10:36:26.852106: step: 24/77, loss: 0.0011173434322699904 2023-01-22 10:36:28.231275: step: 28/77, loss: 0.006730676628649235 2023-01-22 10:36:29.704371: step: 32/77, loss: 1.2242147931829095e-05 2023-01-22 10:36:31.095821: step: 36/77, loss: 0.005255047231912613 2023-01-22 10:36:32.522444: step: 40/77, loss: 1.379373497911729e-05 2023-01-22 10:36:33.986960: step: 44/77, loss: 0.004701434168964624 2023-01-22 10:36:35.407992: step: 48/77, loss: 0.0014443210093304515 2023-01-22 10:36:36.887587: step: 52/77, loss: 0.0008496107766404748 2023-01-22 10:36:38.292184: step: 56/77, loss: 6.5918388827412855e-06 2023-01-22 10:36:39.811228: step: 60/77, loss: 0.019632356241345406 2023-01-22 10:36:41.210283: step: 64/77, loss: 0.0003296667418908328 2023-01-22 10:36:42.624901: step: 68/77, loss: 0.0006062939064577222 2023-01-22 10:36:44.080404: step: 72/77, loss: 0.0002514682710170746 2023-01-22 10:36:45.537171: step: 76/77, loss: 0.023351075127720833 2023-01-22 10:36:46.906756: step: 80/77, loss: 5.627698101307033e-06 2023-01-22 10:36:48.312317: step: 84/77, loss: 2.480560215190053e-05 2023-01-22 10:36:49.765168: step: 88/77, loss: 4.735262336907908e-06 2023-01-22 10:36:51.203474: step: 92/77, loss: 6.775063957320526e-05 2023-01-22 10:36:52.669187: step: 96/77, loss: 0.0003638921189121902 2023-01-22 10:36:54.158135: step: 100/77, loss: 0.02972320280969143 2023-01-22 10:36:55.557807: step: 104/77, loss: 0.0001608357997611165 2023-01-22 10:36:57.064530: step: 108/77, loss: 0.0215868279337883 2023-01-22 10:36:58.547037: step: 112/77, loss: 0.05368008837103844 2023-01-22 10:36:59.988075: step: 116/77, loss: 0.005768325179815292 2023-01-22 10:37:01.423583: step: 120/77, loss: 0.005247610621154308 2023-01-22 10:37:02.843232: step: 124/77, loss: 0.002271172357723117 2023-01-22 10:37:04.350712: step: 128/77, loss: 9.185097587760538e-05 2023-01-22 10:37:05.736479: step: 132/77, loss: 0.0012038334971293807 2023-01-22 10:37:07.087071: step: 136/77, loss: 0.0008183949394151568 2023-01-22 10:37:08.561593: step: 140/77, loss: 0.03969525545835495 2023-01-22 10:37:09.996570: step: 144/77, loss: 0.0012220474891364574 2023-01-22 10:37:11.407812: step: 148/77, loss: 0.0033739679493010044 2023-01-22 10:37:12.828490: step: 152/77, loss: 1.5616130895068636e-06 2023-01-22 10:37:14.311920: step: 156/77, loss: 0.060396626591682434 2023-01-22 10:37:15.815950: step: 160/77, loss: 0.009896337985992432 2023-01-22 10:37:17.291064: step: 164/77, loss: 0.000577462837100029 2023-01-22 10:37:18.739763: step: 168/77, loss: 0.031885746866464615 2023-01-22 10:37:20.207564: step: 172/77, loss: 4.327083661337383e-05 2023-01-22 10:37:21.683610: step: 176/77, loss: 0.010665344074368477 2023-01-22 10:37:23.096755: step: 180/77, loss: 3.1690917239757255e-05 2023-01-22 10:37:24.538886: step: 184/77, loss: 0.016238974407315254 2023-01-22 10:37:25.948006: step: 188/77, loss: 3.9243343053385615e-05 2023-01-22 10:37:27.417773: step: 192/77, loss: 7.24821729818359e-05 2023-01-22 10:37:28.874775: step: 196/77, loss: 2.0607499209290836e-06 2023-01-22 10:37:30.337758: step: 200/77, loss: 0.0031924722716212273 2023-01-22 10:37:31.710459: step: 204/77, loss: 8.680317841935903e-05 2023-01-22 10:37:33.105631: step: 208/77, loss: 0.004762844182550907 2023-01-22 10:37:34.527835: step: 212/77, loss: 0.004229036625474691 2023-01-22 10:37:35.933983: step: 216/77, loss: 0.0013016742886975408 2023-01-22 10:37:37.395834: step: 220/77, loss: 0.0001282370212720707 2023-01-22 10:37:38.822936: step: 224/77, loss: 3.843209924525581e-05 2023-01-22 10:37:40.191855: step: 228/77, loss: 0.02050735242664814 2023-01-22 10:37:41.622781: step: 232/77, loss: 0.0035930124577134848 2023-01-22 10:37:43.035874: step: 236/77, loss: 0.00012073626567143947 2023-01-22 10:37:44.443012: step: 240/77, loss: 0.040597103536129 2023-01-22 10:37:45.862153: step: 244/77, loss: 9.38911980483681e-05 2023-01-22 10:37:47.331307: step: 248/77, loss: 1.2011138096568175e-05 2023-01-22 10:37:48.817443: step: 252/77, loss: 0.03938305377960205 2023-01-22 10:37:50.307538: step: 256/77, loss: 0.00027365548885427415 2023-01-22 10:37:51.751905: step: 260/77, loss: 0.0023456427734345198 2023-01-22 10:37:53.218167: step: 264/77, loss: 0.007549828849732876 2023-01-22 10:37:54.656731: step: 268/77, loss: 0.0025673359632492065 2023-01-22 10:37:56.022196: step: 272/77, loss: 0.0044209775514900684 2023-01-22 10:37:57.401718: step: 276/77, loss: 1.4037575965630822e-05 2023-01-22 10:37:58.834363: step: 280/77, loss: 0.0001280048891203478 2023-01-22 10:38:00.371736: step: 284/77, loss: 0.03903944045305252 2023-01-22 10:38:01.824256: step: 288/77, loss: 1.4573142834706232e-06 2023-01-22 10:38:03.314302: step: 292/77, loss: 0.09980116039514542 2023-01-22 10:38:04.817211: step: 296/77, loss: 0.00011804010136984289 2023-01-22 10:38:06.252991: step: 300/77, loss: 0.011888409033417702 2023-01-22 10:38:07.675750: step: 304/77, loss: 0.0007111412705853581 2023-01-22 10:38:09.063365: step: 308/77, loss: 4.1683739254949614e-05 2023-01-22 10:38:10.501180: step: 312/77, loss: 0.012236321344971657 2023-01-22 10:38:12.014992: step: 316/77, loss: 0.0014427776914089918 2023-01-22 10:38:13.453594: step: 320/77, loss: 6.675928307231516e-05 2023-01-22 10:38:14.896922: step: 324/77, loss: 0.004596993327140808 2023-01-22 10:38:16.327594: step: 328/77, loss: 0.014233395457267761 2023-01-22 10:38:17.754426: step: 332/77, loss: 0.06907449662685394 2023-01-22 10:38:19.202714: step: 336/77, loss: 5.8134014579991344e-06 2023-01-22 10:38:20.667157: step: 340/77, loss: 3.035301688214531e-06 2023-01-22 10:38:22.099172: step: 344/77, loss: 0.000876867794431746 2023-01-22 10:38:23.545403: step: 348/77, loss: 0.0074017345905303955 2023-01-22 10:38:24.958516: step: 352/77, loss: 0.0007744339527562261 2023-01-22 10:38:26.406914: step: 356/77, loss: 0.008747021667659283 2023-01-22 10:38:27.819508: step: 360/77, loss: 0.017727002501487732 2023-01-22 10:38:29.219895: step: 364/77, loss: 0.00010129573638550937 2023-01-22 10:38:30.646297: step: 368/77, loss: 0.01120673306286335 2023-01-22 10:38:32.062394: step: 372/77, loss: 0.004367190413177013 2023-01-22 10:38:33.461905: step: 376/77, loss: 6.192243745317683e-05 2023-01-22 10:38:34.838331: step: 380/77, loss: 0.0005244517815299332 2023-01-22 10:38:36.256159: step: 384/77, loss: 0.0010551504092290998 2023-01-22 10:38:37.654369: step: 388/77, loss: 2.8043286874890327e-06 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.050765210899589394, 'epoch': 20} Test Chinese: {'template': {'p': 0.9583333333333334, 'r': 0.5390625, 'f1': 0.69}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.02628571428571428, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 20} Test Korean: {'template': {'p': 0.958904109589041, 'r': 0.546875, 'f1': 0.6965174129353234}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.026533996683250415, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.050765210899589394, 'epoch': 20} Test Russian: {'template': {'p': 0.9583333333333334, 'r': 0.5390625, 'f1': 0.69}, 'slot': {'p': 0.6666666666666666, 'r': 0.019627085377821395, 'f1': 0.03813155386081983}, 'combined': 0.02631077216396568, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 21 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 10:40:19.780333: step: 4/77, loss: 0.005530391354113817 2023-01-22 10:40:21.157078: step: 8/77, loss: 0.0003530173853505403 2023-01-22 10:40:22.591433: step: 12/77, loss: 0.0536833330988884 2023-01-22 10:40:24.071934: step: 16/77, loss: 0.005902047269046307 2023-01-22 10:40:25.505843: step: 20/77, loss: 4.738548682325927e-07 2023-01-22 10:40:26.981215: step: 24/77, loss: 0.0003305294376332313 2023-01-22 10:40:28.394480: step: 28/77, loss: 0.00017434243636671454 2023-01-22 10:40:29.834553: step: 32/77, loss: 0.00037804216844961047 2023-01-22 10:40:31.271605: step: 36/77, loss: 0.003190763061866164 2023-01-22 10:40:32.731672: step: 40/77, loss: 0.0003863647871185094 2023-01-22 10:40:34.178146: step: 44/77, loss: 2.1436835595523007e-05 2023-01-22 10:40:35.631349: step: 48/77, loss: 0.0010237182723358274 2023-01-22 10:40:37.084696: step: 52/77, loss: 0.00014183111488819122 2023-01-22 10:40:38.537833: step: 56/77, loss: 0.0003928892838303 2023-01-22 10:40:39.952331: step: 60/77, loss: 0.01873972825706005 2023-01-22 10:40:41.433636: step: 64/77, loss: 0.00020161019347142428 2023-01-22 10:40:42.912131: step: 68/77, loss: 0.006954270415008068 2023-01-22 10:40:44.430670: step: 72/77, loss: 0.0023213254753500223 2023-01-22 10:40:45.875348: step: 76/77, loss: 0.013047211803495884 2023-01-22 10:40:47.305571: step: 80/77, loss: 0.006372448056936264 2023-01-22 10:40:48.743456: step: 84/77, loss: 1.8654458472155966e-05 2023-01-22 10:40:50.135247: step: 88/77, loss: 0.019734129309654236 2023-01-22 10:40:51.674741: step: 92/77, loss: 0.0007313595851883292 2023-01-22 10:40:53.076613: step: 96/77, loss: 2.9799655749229714e-05 2023-01-22 10:40:54.428677: step: 100/77, loss: 0.0002187217032769695 2023-01-22 10:40:55.881808: step: 104/77, loss: 1.2623586371773854e-05 2023-01-22 10:40:57.345845: step: 108/77, loss: 0.011636082082986832 2023-01-22 10:40:58.775725: step: 112/77, loss: 0.012348459102213383 2023-01-22 10:41:00.285412: step: 116/77, loss: 0.01756596937775612 2023-01-22 10:41:01.762585: step: 120/77, loss: 5.153836809768109e-06 2023-01-22 10:41:03.129900: step: 124/77, loss: 0.0007325861952267587 2023-01-22 10:41:04.608806: step: 128/77, loss: 0.00038951492751948535 2023-01-22 10:41:06.034078: step: 132/77, loss: 0.0022269547916948795 2023-01-22 10:41:07.517143: step: 136/77, loss: 1.160401643574005e-05 2023-01-22 10:41:08.959072: step: 140/77, loss: 0.04449259117245674 2023-01-22 10:41:10.350434: step: 144/77, loss: 1.0371063581260387e-06 2023-01-22 10:41:11.783086: step: 148/77, loss: 0.000331414194079116 2023-01-22 10:41:13.208385: step: 152/77, loss: 6.859993300167844e-05 2023-01-22 10:41:14.646364: step: 156/77, loss: 2.431750772302621e-06 2023-01-22 10:41:16.111982: step: 160/77, loss: 0.006880715489387512 2023-01-22 10:41:17.533735: step: 164/77, loss: 0.0014426393900066614 2023-01-22 10:41:18.987776: step: 168/77, loss: 0.00031652135658077896 2023-01-22 10:41:20.460692: step: 172/77, loss: 1.2606246855284553e-06 2023-01-22 10:41:21.818898: step: 176/77, loss: 2.0184765162412077e-05 2023-01-22 10:41:23.324029: step: 180/77, loss: 0.00422252481803298 2023-01-22 10:41:24.812181: step: 184/77, loss: 5.170652457309188e-07 2023-01-22 10:41:26.298491: step: 188/77, loss: 0.03473076969385147 2023-01-22 10:41:27.761662: step: 192/77, loss: 0.0005205783527344465 2023-01-22 10:41:29.194192: step: 196/77, loss: 0.0054445634596049786 2023-01-22 10:41:30.596249: step: 200/77, loss: 2.1511028535314836e-05 2023-01-22 10:41:31.985000: step: 204/77, loss: 4.1872135625453666e-07 2023-01-22 10:41:33.351269: step: 208/77, loss: 8.406052984355483e-06 2023-01-22 10:41:34.792676: step: 212/77, loss: 0.00669349217787385 2023-01-22 10:41:36.271766: step: 216/77, loss: 0.00034069089451804757 2023-01-22 10:41:37.705441: step: 220/77, loss: 0.011898697353899479 2023-01-22 10:41:39.127690: step: 224/77, loss: 0.000776562956161797 2023-01-22 10:41:40.598385: step: 228/77, loss: 0.001380259171128273 2023-01-22 10:41:42.052642: step: 232/77, loss: 1.1802635526692029e-05 2023-01-22 10:41:43.484615: step: 236/77, loss: 2.6416120817884803e-05 2023-01-22 10:41:45.011380: step: 240/77, loss: 0.003274685936048627 2023-01-22 10:41:46.439196: step: 244/77, loss: 0.002510338556021452 2023-01-22 10:41:47.870292: step: 248/77, loss: 3.347124220454134e-05 2023-01-22 10:41:49.344785: step: 252/77, loss: 0.08811137080192566 2023-01-22 10:41:50.790953: step: 256/77, loss: 1.0266564913763432e-06 2023-01-22 10:41:52.142599: step: 260/77, loss: 0.012822951190173626 2023-01-22 10:41:53.571629: step: 264/77, loss: 1.4880620256008115e-05 2023-01-22 10:41:55.004308: step: 268/77, loss: 5.9488898841664195e-05 2023-01-22 10:41:56.506363: step: 272/77, loss: 0.0005867235595360398 2023-01-22 10:41:57.955464: step: 276/77, loss: 0.010660209693014622 2023-01-22 10:41:59.414734: step: 280/77, loss: 0.01802891679108143 2023-01-22 10:42:00.858617: step: 284/77, loss: 0.0002461299882270396 2023-01-22 10:42:02.298253: step: 288/77, loss: 0.00024737458443269134 2023-01-22 10:42:03.795937: step: 292/77, loss: 0.0002895930374506861 2023-01-22 10:42:05.174383: step: 296/77, loss: 7.560687663499266e-05 2023-01-22 10:42:06.617779: step: 300/77, loss: 1.0199121788900811e-05 2023-01-22 10:42:08.047459: step: 304/77, loss: 0.008103596977889538 2023-01-22 10:42:09.431977: step: 308/77, loss: 0.00024358216614928097 2023-01-22 10:42:10.932250: step: 312/77, loss: 3.187201218679547e-05 2023-01-22 10:42:12.390190: step: 316/77, loss: 2.9708575311815366e-05 2023-01-22 10:42:13.775349: step: 320/77, loss: 0.0017332624411210418 2023-01-22 10:42:15.269065: step: 324/77, loss: 0.0007244001026265323 2023-01-22 10:42:16.701593: step: 328/77, loss: 0.005347931291908026 2023-01-22 10:42:18.146948: step: 332/77, loss: 0.0006719163502566516 2023-01-22 10:42:19.644670: step: 336/77, loss: 0.03064035065472126 2023-01-22 10:42:21.099926: step: 340/77, loss: 0.0007121642120182514 2023-01-22 10:42:22.523365: step: 344/77, loss: 8.618739229859784e-06 2023-01-22 10:42:23.950226: step: 348/77, loss: 0.000486185890622437 2023-01-22 10:42:25.385955: step: 352/77, loss: 2.7378995582694188e-05 2023-01-22 10:42:26.843485: step: 356/77, loss: 3.448443385423161e-05 2023-01-22 10:42:28.278128: step: 360/77, loss: 2.4184762878576294e-05 2023-01-22 10:42:29.740298: step: 364/77, loss: 0.0701582059264183 2023-01-22 10:42:31.140486: step: 368/77, loss: 9.116478031501174e-05 2023-01-22 10:42:32.637031: step: 372/77, loss: 0.0022903112694621086 2023-01-22 10:42:34.075532: step: 376/77, loss: 1.1425704542489257e-05 2023-01-22 10:42:35.533396: step: 380/77, loss: 0.0025022730696946383 2023-01-22 10:42:37.001212: step: 384/77, loss: 9.283351118938299e-07 2023-01-22 10:42:38.394346: step: 388/77, loss: 0.00021973026741761714 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.48717948717948717, 'r': 0.035916824196597356, 'f1': 0.06690140845070423}, 'combined': 0.04747841890049978, 'epoch': 21} Test Chinese: {'template': {'p': 0.9571428571428572, 'r': 0.5234375, 'f1': 0.6767676767676768}, 'slot': {'p': 0.65625, 'r': 0.020608439646712464, 'f1': 0.039961941008563276}, 'combined': 0.027044949975492318, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.48717948717948717, 'r': 0.035916824196597356, 'f1': 0.06690140845070423}, 'combined': 0.04747841890049978, 'epoch': 21} Test Korean: {'template': {'p': 0.9571428571428572, 'r': 0.5234375, 'f1': 0.6767676767676768}, 'slot': {'p': 0.65625, 'r': 0.020608439646712464, 'f1': 0.039961941008563276}, 'combined': 0.027044949975492318, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.48717948717948717, 'r': 0.035916824196597356, 'f1': 0.06690140845070423}, 'combined': 0.04747841890049978, 'epoch': 21} Test Russian: {'template': {'p': 0.9577464788732394, 'r': 0.53125, 'f1': 0.6834170854271355}, 'slot': {'p': 0.65625, 'r': 0.020608439646712464, 'f1': 0.039961941008563276}, 'combined': 0.027310673252083438, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 22 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 10:44:19.963190: step: 4/77, loss: 0.0007130326121114194 2023-01-22 10:44:21.342423: step: 8/77, loss: 0.004753975197672844 2023-01-22 10:44:22.776285: step: 12/77, loss: 0.001979820430278778 2023-01-22 10:44:24.211719: step: 16/77, loss: 0.0001369004021398723 2023-01-22 10:44:25.598853: step: 20/77, loss: 0.002938532270491123 2023-01-22 10:44:27.033394: step: 24/77, loss: 0.018064025789499283 2023-01-22 10:44:28.404175: step: 28/77, loss: 0.010816946625709534 2023-01-22 10:44:29.784913: step: 32/77, loss: 0.000314035132760182 2023-01-22 10:44:31.199019: step: 36/77, loss: 0.001084410585463047 2023-01-22 10:44:32.616008: step: 40/77, loss: 0.0005837613716721535 2023-01-22 10:44:34.085796: step: 44/77, loss: 0.00906037725508213 2023-01-22 10:44:35.517989: step: 48/77, loss: 0.00029241805896162987 2023-01-22 10:44:36.951960: step: 52/77, loss: 2.6940817406284623e-05 2023-01-22 10:44:38.361413: step: 56/77, loss: 4.1381859773537144e-05 2023-01-22 10:44:39.828376: step: 60/77, loss: 0.00024003698490560055 2023-01-22 10:44:41.271548: step: 64/77, loss: 0.01580897718667984 2023-01-22 10:44:42.749447: step: 68/77, loss: 2.7904839953407645e-05 2023-01-22 10:44:44.165129: step: 72/77, loss: 5.069902545074001e-05 2023-01-22 10:44:45.660118: step: 76/77, loss: 0.025134893134236336 2023-01-22 10:44:47.156259: step: 80/77, loss: 0.004092110786587 2023-01-22 10:44:48.545035: step: 84/77, loss: 0.0008029898745007813 2023-01-22 10:44:49.987249: step: 88/77, loss: 0.00046595188905484974 2023-01-22 10:44:51.402203: step: 92/77, loss: 0.0006075751734897494 2023-01-22 10:44:52.838681: step: 96/77, loss: 7.684613228775561e-05 2023-01-22 10:44:54.253953: step: 100/77, loss: 4.504282696871087e-05 2023-01-22 10:44:55.675801: step: 104/77, loss: 0.0023876773193478584 2023-01-22 10:44:57.032621: step: 108/77, loss: 0.0033316421322524548 2023-01-22 10:44:58.477871: step: 112/77, loss: 0.002512221457436681 2023-01-22 10:44:59.972578: step: 116/77, loss: 0.0002792548621073365 2023-01-22 10:45:01.357656: step: 120/77, loss: 7.562458631582558e-05 2023-01-22 10:45:02.842267: step: 124/77, loss: 0.016864262521266937 2023-01-22 10:45:04.244715: step: 128/77, loss: 5.999497807351872e-05 2023-01-22 10:45:05.794333: step: 132/77, loss: 0.0022027171216905117 2023-01-22 10:45:07.210415: step: 136/77, loss: 0.04083377867937088 2023-01-22 10:45:08.642989: step: 140/77, loss: 0.0006076354184187949 2023-01-22 10:45:10.062465: step: 144/77, loss: 0.0026501461397856474 2023-01-22 10:45:11.501286: step: 148/77, loss: 4.051176802022383e-06 2023-01-22 10:45:12.916685: step: 152/77, loss: 2.730186315602623e-05 2023-01-22 10:45:14.321021: step: 156/77, loss: 0.0032909002620726824 2023-01-22 10:45:15.747043: step: 160/77, loss: 0.00011777214240282774 2023-01-22 10:45:17.177194: step: 164/77, loss: 0.0002647798683028668 2023-01-22 10:45:18.647348: step: 168/77, loss: 1.1578140401979908e-06 2023-01-22 10:45:20.048988: step: 172/77, loss: 0.00021434163500089198 2023-01-22 10:45:21.488231: step: 176/77, loss: 0.0002885766443796456 2023-01-22 10:45:22.869926: step: 180/77, loss: 0.0005006411811336875 2023-01-22 10:45:24.237869: step: 184/77, loss: 4.458000330487266e-05 2023-01-22 10:45:25.699640: step: 188/77, loss: 0.00045059213880449533 2023-01-22 10:45:27.130340: step: 192/77, loss: 0.02458992972970009 2023-01-22 10:45:28.560321: step: 196/77, loss: 2.822162969096098e-05 2023-01-22 10:45:29.975360: step: 200/77, loss: 3.4389731808914803e-06 2023-01-22 10:45:31.401787: step: 204/77, loss: 6.366405159496935e-06 2023-01-22 10:45:32.805494: step: 208/77, loss: 0.0012770395260304213 2023-01-22 10:45:34.212543: step: 212/77, loss: 0.00019756241817958653 2023-01-22 10:45:35.658213: step: 216/77, loss: 4.08826963393949e-06 2023-01-22 10:45:37.127320: step: 220/77, loss: 0.028113486245274544 2023-01-22 10:45:38.569281: step: 224/77, loss: 0.00014170001668389887 2023-01-22 10:45:39.978044: step: 228/77, loss: 9.80123604676919e-06 2023-01-22 10:45:41.429501: step: 232/77, loss: 0.0005905760335735977 2023-01-22 10:45:42.820619: step: 236/77, loss: 0.029797719791531563 2023-01-22 10:45:44.187196: step: 240/77, loss: 0.0002706180966924876 2023-01-22 10:45:45.602020: step: 244/77, loss: 0.0027564410120248795 2023-01-22 10:45:47.065240: step: 248/77, loss: 0.004286859650164843 2023-01-22 10:45:48.552205: step: 252/77, loss: 0.00023515461361967027 2023-01-22 10:45:49.999389: step: 256/77, loss: 0.009189371019601822 2023-01-22 10:45:51.421303: step: 260/77, loss: 4.962151433574036e-05 2023-01-22 10:45:52.867132: step: 264/77, loss: 3.2074090995592996e-05 2023-01-22 10:45:54.293643: step: 268/77, loss: 0.016157323494553566 2023-01-22 10:45:55.736024: step: 272/77, loss: 0.009273549541831017 2023-01-22 10:45:57.234419: step: 276/77, loss: 7.113243555068038e-06 2023-01-22 10:45:58.651476: step: 280/77, loss: 0.0007872179849073291 2023-01-22 10:46:00.070941: step: 284/77, loss: 1.5747938959975727e-05 2023-01-22 10:46:01.499237: step: 288/77, loss: 7.15578644303605e-05 2023-01-22 10:46:02.916696: step: 292/77, loss: 1.0132703209819738e-06 2023-01-22 10:46:04.294834: step: 296/77, loss: 6.1032983467157464e-06 2023-01-22 10:46:05.743426: step: 300/77, loss: 2.1115256458870135e-05 2023-01-22 10:46:07.159610: step: 304/77, loss: 0.0014195248950272799 2023-01-22 10:46:08.556383: step: 308/77, loss: 0.00951320305466652 2023-01-22 10:46:09.959304: step: 312/77, loss: 0.0012874712701886892 2023-01-22 10:46:11.398800: step: 316/77, loss: 6.160401972010732e-05 2023-01-22 10:46:12.772660: step: 320/77, loss: 0.1084732934832573 2023-01-22 10:46:14.206085: step: 324/77, loss: 0.00011493593046907336 2023-01-22 10:46:15.635418: step: 328/77, loss: 0.007658337242901325 2023-01-22 10:46:17.028208: step: 332/77, loss: 0.0026278269942849874 2023-01-22 10:46:18.481256: step: 336/77, loss: 6.384636071743444e-05 2023-01-22 10:46:19.972563: step: 340/77, loss: 1.5949266526149586e-05 2023-01-22 10:46:21.453235: step: 344/77, loss: 0.07788742333650589 2023-01-22 10:46:22.847949: step: 348/77, loss: 8.078066457528621e-05 2023-01-22 10:46:24.270554: step: 352/77, loss: 0.00018889573402702808 2023-01-22 10:46:25.727456: step: 356/77, loss: 0.0018875160021707416 2023-01-22 10:46:27.204955: step: 360/77, loss: 1.9665896616061218e-05 2023-01-22 10:46:28.586404: step: 364/77, loss: 0.0001643340801820159 2023-01-22 10:46:30.074900: step: 368/77, loss: 0.005625385791063309 2023-01-22 10:46:31.514387: step: 372/77, loss: 0.00016497427714057267 2023-01-22 10:46:32.982000: step: 376/77, loss: 0.021070636808872223 2023-01-22 10:46:34.394066: step: 380/77, loss: 0.00030101914308033884 2023-01-22 10:46:35.819784: step: 384/77, loss: 5.654891356243752e-05 2023-01-22 10:46:37.306376: step: 388/77, loss: 8.754900591156911e-06 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.4418604651162791, 'r': 0.035916824196597356, 'f1': 0.06643356643356645}, 'combined': 0.04714640198511168, 'epoch': 22} Test Chinese: {'template': {'p': 0.9090909090909091, 'r': 0.546875, 'f1': 0.6829268292682926}, 'slot': {'p': 0.6111111111111112, 'r': 0.021589793915603533, 'f1': 0.04170616113744076}, 'combined': 0.028482256386544905, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.4418604651162791, 'r': 0.035916824196597356, 'f1': 0.06643356643356645}, 'combined': 0.04714640198511168, 'epoch': 22} Test Korean: {'template': {'p': 0.9090909090909091, 'r': 0.546875, 'f1': 0.6829268292682926}, 'slot': {'p': 0.6111111111111112, 'r': 0.021589793915603533, 'f1': 0.04170616113744076}, 'combined': 0.028482256386544905, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.4523809523809524, 'r': 0.035916824196597356, 'f1': 0.06654991243432576}, 'combined': 0.0472289701146828, 'epoch': 22} Test Russian: {'template': {'p': 0.9090909090909091, 'r': 0.546875, 'f1': 0.6829268292682926}, 'slot': {'p': 0.6, 'r': 0.020608439646712464, 'f1': 0.03984819734345351}, 'combined': 0.027213403063821907, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 23 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 10:48:19.316278: step: 4/77, loss: 9.152458369499072e-05 2023-01-22 10:48:20.711301: step: 8/77, loss: 0.0014891615137457848 2023-01-22 10:48:22.149790: step: 12/77, loss: 0.004348850343376398 2023-01-22 10:48:23.628933: step: 16/77, loss: 2.2956748580327258e-05 2023-01-22 10:48:25.041062: step: 20/77, loss: 0.00022393259860109538 2023-01-22 10:48:26.532853: step: 24/77, loss: 6.849241344752954e-06 2023-01-22 10:48:27.991442: step: 28/77, loss: 0.0009687441634014249 2023-01-22 10:48:29.389011: step: 32/77, loss: 4.574593788220227e-07 2023-01-22 10:48:30.815264: step: 36/77, loss: 0.0002080762351397425 2023-01-22 10:48:32.219212: step: 40/77, loss: 0.020183607935905457 2023-01-22 10:48:33.631442: step: 44/77, loss: 6.683401443297043e-05 2023-01-22 10:48:35.047574: step: 48/77, loss: 4.300964064896107e-05 2023-01-22 10:48:36.530317: step: 52/77, loss: 0.00010689069313229993 2023-01-22 10:48:37.918166: step: 56/77, loss: 7.621638815180631e-06 2023-01-22 10:48:39.306157: step: 60/77, loss: 0.005777846090495586 2023-01-22 10:48:40.675823: step: 64/77, loss: 1.0221141565125436e-05 2023-01-22 10:48:42.129522: step: 68/77, loss: 0.00042474266956560314 2023-01-22 10:48:43.565420: step: 72/77, loss: 0.0002089111803798005 2023-01-22 10:48:44.985681: step: 76/77, loss: 0.00219891220331192 2023-01-22 10:48:46.445609: step: 80/77, loss: 0.0003327025333419442 2023-01-22 10:48:48.004812: step: 84/77, loss: 1.5328430890804157e-05 2023-01-22 10:48:49.449814: step: 88/77, loss: 1.4556253518094309e-05 2023-01-22 10:48:50.932767: step: 92/77, loss: 0.0012400572886690497 2023-01-22 10:48:52.331180: step: 96/77, loss: 1.1806205293396488e-05 2023-01-22 10:48:53.753757: step: 100/77, loss: 0.013861570507287979 2023-01-22 10:48:55.122113: step: 104/77, loss: 0.004015210084617138 2023-01-22 10:48:56.563829: step: 108/77, loss: 7.869948603911325e-05 2023-01-22 10:48:57.971511: step: 112/77, loss: 0.0006869456265121698 2023-01-22 10:48:59.407192: step: 116/77, loss: 8.663826520205475e-06 2023-01-22 10:49:00.782114: step: 120/77, loss: 0.0004616382939275354 2023-01-22 10:49:02.281041: step: 124/77, loss: 0.00424158526584506 2023-01-22 10:49:03.769930: step: 128/77, loss: 0.0019587939605116844 2023-01-22 10:49:05.177115: step: 132/77, loss: 7.3105504270643e-05 2023-01-22 10:49:06.575404: step: 136/77, loss: 1.5767673176014796e-05 2023-01-22 10:49:08.041120: step: 140/77, loss: 0.011566844768822193 2023-01-22 10:49:09.477031: step: 144/77, loss: 1.3639378266816493e-05 2023-01-22 10:49:10.970613: step: 148/77, loss: 2.9078797524562106e-05 2023-01-22 10:49:12.479063: step: 152/77, loss: 0.00010868874232983217 2023-01-22 10:49:13.976065: step: 156/77, loss: 0.012522750534117222 2023-01-22 10:49:15.423084: step: 160/77, loss: 4.022658686153591e-05 2023-01-22 10:49:16.881178: step: 164/77, loss: 9.945671627065167e-05 2023-01-22 10:49:18.322384: step: 168/77, loss: 0.00214787176810205 2023-01-22 10:49:19.745723: step: 172/77, loss: 3.362211646162905e-05 2023-01-22 10:49:21.186302: step: 176/77, loss: 1.410477125318721e-05 2023-01-22 10:49:22.673615: step: 180/77, loss: 1.957959284482058e-06 2023-01-22 10:49:24.094637: step: 184/77, loss: 3.4792228689184412e-06 2023-01-22 10:49:25.595466: step: 188/77, loss: 7.544868276454508e-05 2023-01-22 10:49:27.028316: step: 192/77, loss: 6.105268403189257e-05 2023-01-22 10:49:28.427437: step: 196/77, loss: 1.0859316716960166e-05 2023-01-22 10:49:29.825640: step: 200/77, loss: 0.006887755356729031 2023-01-22 10:49:31.256217: step: 204/77, loss: 0.0004280690918676555 2023-01-22 10:49:32.764518: step: 208/77, loss: 5.831807357026264e-06 2023-01-22 10:49:34.180672: step: 212/77, loss: 8.523413157490722e-07 2023-01-22 10:49:35.597189: step: 216/77, loss: 6.899658183101565e-05 2023-01-22 10:49:37.030424: step: 220/77, loss: 0.0049618808552622795 2023-01-22 10:49:38.526168: step: 224/77, loss: 0.00014697492588311434 2023-01-22 10:49:40.052577: step: 228/77, loss: 0.0004961665254086256 2023-01-22 10:49:41.516738: step: 232/77, loss: 5.026019789511338e-05 2023-01-22 10:49:43.036802: step: 236/77, loss: 0.0016431210096925497 2023-01-22 10:49:44.551104: step: 240/77, loss: 0.0011630343506112695 2023-01-22 10:49:45.998943: step: 244/77, loss: 0.0003789504407905042 2023-01-22 10:49:47.475493: step: 248/77, loss: 0.006341112311929464 2023-01-22 10:49:48.939841: step: 252/77, loss: 6.882879915792728e-06 2023-01-22 10:49:50.385558: step: 256/77, loss: 0.0005342121585272253 2023-01-22 10:49:51.813742: step: 260/77, loss: 0.0001719615829642862 2023-01-22 10:49:53.293101: step: 264/77, loss: 9.308056178269908e-05 2023-01-22 10:49:54.687185: step: 268/77, loss: 6.610100535908714e-05 2023-01-22 10:49:56.155282: step: 272/77, loss: 8.758709009271115e-05 2023-01-22 10:49:57.551487: step: 276/77, loss: 0.010416517965495586 2023-01-22 10:49:58.990568: step: 280/77, loss: 0.01606697589159012 2023-01-22 10:50:00.452330: step: 284/77, loss: 1.2740185866277898e-06 2023-01-22 10:50:01.912580: step: 288/77, loss: 1.1670215826597996e-05 2023-01-22 10:50:03.323557: step: 292/77, loss: 0.06407275795936584 2023-01-22 10:50:04.750452: step: 296/77, loss: 8.716026968613733e-06 2023-01-22 10:50:06.182099: step: 300/77, loss: 0.0001433848956367001 2023-01-22 10:50:07.592296: step: 304/77, loss: 4.845308285439387e-05 2023-01-22 10:50:09.088812: step: 308/77, loss: 8.231492392951623e-05 2023-01-22 10:50:10.580837: step: 312/77, loss: 0.0001697995758149773 2023-01-22 10:50:11.984838: step: 316/77, loss: 0.00019652053015306592 2023-01-22 10:50:13.482953: step: 320/77, loss: 0.0017496251966804266 2023-01-22 10:50:14.909025: step: 324/77, loss: 0.010774437338113785 2023-01-22 10:50:16.370491: step: 328/77, loss: 4.13789121012087e-06 2023-01-22 10:50:17.745439: step: 332/77, loss: 0.00026316073490306735 2023-01-22 10:50:19.177408: step: 336/77, loss: 0.0004801765608135611 2023-01-22 10:50:20.622983: step: 340/77, loss: 0.00015464633179362863 2023-01-22 10:50:22.033336: step: 344/77, loss: 1.1597515367611777e-05 2023-01-22 10:50:23.509664: step: 348/77, loss: 0.014501575380563736 2023-01-22 10:50:24.970276: step: 352/77, loss: 0.00031463519553653896 2023-01-22 10:50:26.398776: step: 356/77, loss: 2.0194192984490655e-05 2023-01-22 10:50:27.770288: step: 360/77, loss: 0.0047062234953045845 2023-01-22 10:50:29.202631: step: 364/77, loss: 0.003076509339734912 2023-01-22 10:50:30.648282: step: 368/77, loss: 7.674502558074892e-05 2023-01-22 10:50:32.088630: step: 372/77, loss: 0.0009569913381710649 2023-01-22 10:50:33.553348: step: 376/77, loss: 0.000321688101394102 2023-01-22 10:50:34.910029: step: 380/77, loss: 0.00011519218242028728 2023-01-22 10:50:36.370234: step: 384/77, loss: 2.3515976863563992e-05 2023-01-22 10:50:37.836482: step: 388/77, loss: 0.0040862769819796085 ================================================== Loss: 0.003 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4634146341463415, 'r': 0.035916824196597356, 'f1': 0.06666666666666668}, 'combined': 0.04912280701754387, 'epoch': 23} Test Chinese: {'template': {'p': 0.9452054794520548, 'r': 0.5390625, 'f1': 0.6865671641791045}, 'slot': {'p': 0.58, 'r': 0.02845927379784102, 'f1': 0.05425631431244154}, 'combined': 0.03725060385630315, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.48717948717948717, 'r': 0.035916824196597356, 'f1': 0.06690140845070423}, 'combined': 0.04839676356008391, 'epoch': 23} Test Korean: {'template': {'p': 0.9324324324324325, 'r': 0.5390625, 'f1': 0.6831683168316832}, 'slot': {'p': 0.56, 'r': 0.02747791952894995, 'f1': 0.05238540692235734}, 'combined': 0.03578805027368967, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4634146341463415, 'r': 0.035916824196597356, 'f1': 0.06666666666666668}, 'combined': 0.04912280701754387, 'epoch': 23} Test Russian: {'template': {'p': 0.9324324324324325, 'r': 0.5390625, 'f1': 0.6831683168316832}, 'slot': {'p': 0.5686274509803921, 'r': 0.02845927379784102, 'f1': 0.05420560747663552}, 'combined': 0.037031553622651994, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 24 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 10:52:23.831653: step: 4/77, loss: 0.00016842114564497024 2023-01-22 10:52:25.218217: step: 8/77, loss: 0.00707374420017004 2023-01-22 10:52:26.741975: step: 12/77, loss: 9.162640708382241e-06 2023-01-22 10:52:28.120703: step: 16/77, loss: 6.259590008994564e-05 2023-01-22 10:52:29.581619: step: 20/77, loss: 5.2402078836166766e-06 2023-01-22 10:52:31.023805: step: 24/77, loss: 8.415575393883046e-06 2023-01-22 10:52:32.475884: step: 28/77, loss: 0.006951048504561186 2023-01-22 10:52:33.890093: step: 32/77, loss: 3.4868426723733137e-07 2023-01-22 10:52:35.261242: step: 36/77, loss: 7.024884325801395e-06 2023-01-22 10:52:36.690290: step: 40/77, loss: 0.004366460256278515 2023-01-22 10:52:38.165441: step: 44/77, loss: 0.0004859516047872603 2023-01-22 10:52:39.591955: step: 48/77, loss: 0.00039076071698218584 2023-01-22 10:52:41.008978: step: 52/77, loss: 4.046229878440499e-05 2023-01-22 10:52:42.412844: step: 56/77, loss: 0.0024879940319806337 2023-01-22 10:52:43.869884: step: 60/77, loss: 0.0001154499186668545 2023-01-22 10:52:45.299345: step: 64/77, loss: 0.0010849382961168885 2023-01-22 10:52:46.766644: step: 68/77, loss: 0.05313294753432274 2023-01-22 10:52:48.244341: step: 72/77, loss: 0.00030986842466518283 2023-01-22 10:52:49.743496: step: 76/77, loss: 0.006430521607398987 2023-01-22 10:52:51.210449: step: 80/77, loss: 0.0293259434401989 2023-01-22 10:52:52.635481: step: 84/77, loss: 0.018152793869376183 2023-01-22 10:52:54.106657: step: 88/77, loss: 0.0007140887901186943 2023-01-22 10:52:55.554568: step: 92/77, loss: 0.00023382938525173813 2023-01-22 10:52:56.971237: step: 96/77, loss: 8.996458200272173e-05 2023-01-22 10:52:58.426549: step: 100/77, loss: 8.120103302644566e-06 2023-01-22 10:52:59.920754: step: 104/77, loss: 0.0007067503174766898 2023-01-22 10:53:01.348758: step: 108/77, loss: 0.000819536333438009 2023-01-22 10:53:02.795815: step: 112/77, loss: 5.364386197470594e-07 2023-01-22 10:53:04.314373: step: 116/77, loss: 5.002348552807234e-05 2023-01-22 10:53:05.749889: step: 120/77, loss: 0.007737953215837479 2023-01-22 10:53:07.246000: step: 124/77, loss: 0.0038814896252006292 2023-01-22 10:53:08.618785: step: 128/77, loss: 0.0030426017474383116 2023-01-22 10:53:10.137012: step: 132/77, loss: 0.00019459401664789766 2023-01-22 10:53:11.554319: step: 136/77, loss: 4.80973903904669e-05 2023-01-22 10:53:13.028064: step: 140/77, loss: 0.00020540988771244884 2023-01-22 10:53:14.512971: step: 144/77, loss: 9.273541218135506e-05 2023-01-22 10:53:15.944741: step: 148/77, loss: 7.823100531823002e-06 2023-01-22 10:53:17.386198: step: 152/77, loss: 7.632689630554523e-06 2023-01-22 10:53:18.827077: step: 156/77, loss: 7.687011020607315e-06 2023-01-22 10:53:20.283862: step: 160/77, loss: 0.002562094945460558 2023-01-22 10:53:21.741845: step: 164/77, loss: 0.002456388669088483 2023-01-22 10:53:23.138013: step: 168/77, loss: 4.518212517723441e-05 2023-01-22 10:53:24.527135: step: 172/77, loss: 2.0654457330238074e-05 2023-01-22 10:53:26.013494: step: 176/77, loss: 0.001107532880268991 2023-01-22 10:53:27.499617: step: 180/77, loss: 4.694052768172696e-05 2023-01-22 10:53:28.989456: step: 184/77, loss: 4.738108600577107e-06 2023-01-22 10:53:30.415210: step: 188/77, loss: 4.7500452637905255e-06 2023-01-22 10:53:31.906906: step: 192/77, loss: 0.00016000482719391584 2023-01-22 10:53:33.331997: step: 196/77, loss: 9.928902727551758e-05 2023-01-22 10:53:34.817288: step: 200/77, loss: 1.9417080693528987e-05 2023-01-22 10:53:36.324076: step: 204/77, loss: 0.015278271399438381 2023-01-22 10:53:37.811761: step: 208/77, loss: 3.050275699933991e-05 2023-01-22 10:53:39.280649: step: 212/77, loss: 5.545574822463095e-06 2023-01-22 10:53:40.764572: step: 216/77, loss: 0.00744546577334404 2023-01-22 10:53:42.198456: step: 220/77, loss: 0.0001620856928639114 2023-01-22 10:53:43.592131: step: 224/77, loss: 4.286890998628223e-06 2023-01-22 10:53:45.089224: step: 228/77, loss: 0.0006224351236596704 2023-01-22 10:53:46.537937: step: 232/77, loss: 1.8760068769552163e-06 2023-01-22 10:53:47.945213: step: 236/77, loss: 0.04302788898348808 2023-01-22 10:53:49.371311: step: 240/77, loss: 0.009987486526370049 2023-01-22 10:53:50.877757: step: 244/77, loss: 2.8072849090676755e-05 2023-01-22 10:53:52.390366: step: 248/77, loss: 4.380669906822732e-06 2023-01-22 10:53:53.856079: step: 252/77, loss: 0.00025404879124835134 2023-01-22 10:53:55.299728: step: 256/77, loss: 0.0018334381747990847 2023-01-22 10:53:56.761645: step: 260/77, loss: 1.198196696350351e-05 2023-01-22 10:53:58.231746: step: 264/77, loss: 1.776934186636936e-05 2023-01-22 10:53:59.775211: step: 268/77, loss: 3.260213816247415e-06 2023-01-22 10:54:01.246122: step: 272/77, loss: 0.00028547868714667857 2023-01-22 10:54:02.700692: step: 276/77, loss: 3.34371070493944e-05 2023-01-22 10:54:04.169993: step: 280/77, loss: 3.5840192140312865e-05 2023-01-22 10:54:05.555663: step: 284/77, loss: 0.00037121682544238865 2023-01-22 10:54:07.023820: step: 288/77, loss: 0.0007780568557791412 2023-01-22 10:54:08.462638: step: 292/77, loss: 0.00021716710762120783 2023-01-22 10:54:09.970129: step: 296/77, loss: 0.009425071999430656 2023-01-22 10:54:11.422570: step: 300/77, loss: 0.012034622021019459 2023-01-22 10:54:12.836300: step: 304/77, loss: 0.002567733870819211 2023-01-22 10:54:14.338593: step: 308/77, loss: 6.655389006482437e-05 2023-01-22 10:54:15.792201: step: 312/77, loss: 4.360734601505101e-05 2023-01-22 10:54:17.182850: step: 316/77, loss: 0.002691005589440465 2023-01-22 10:54:18.636394: step: 320/77, loss: 0.0007407565717585385 2023-01-22 10:54:20.071361: step: 324/77, loss: 1.1256095604039729e-05 2023-01-22 10:54:21.510057: step: 328/77, loss: 0.003093632636591792 2023-01-22 10:54:22.981539: step: 332/77, loss: 1.4860575902275741e-05 2023-01-22 10:54:24.411427: step: 336/77, loss: 0.00033171725226566195 2023-01-22 10:54:25.830817: step: 340/77, loss: 1.877544093531469e-07 2023-01-22 10:54:27.250426: step: 344/77, loss: 0.015202153474092484 2023-01-22 10:54:28.665398: step: 348/77, loss: 1.6539713669772027e-06 2023-01-22 10:54:30.106496: step: 352/77, loss: 0.0009299773373641074 2023-01-22 10:54:31.573799: step: 356/77, loss: 0.00010895145533140749 2023-01-22 10:54:33.017524: step: 360/77, loss: 0.013841778971254826 2023-01-22 10:54:34.470002: step: 364/77, loss: 1.5914132745820098e-05 2023-01-22 10:54:35.910649: step: 368/77, loss: 0.0004517743072938174 2023-01-22 10:54:37.405251: step: 372/77, loss: 0.00041712226811796427 2023-01-22 10:54:38.810702: step: 376/77, loss: 5.8034638641402125e-06 2023-01-22 10:54:40.218036: step: 380/77, loss: 0.00019174578483216465 2023-01-22 10:54:41.699550: step: 384/77, loss: 0.0004269965284038335 2023-01-22 10:54:43.162013: step: 388/77, loss: 5.146474904904608e-06 ================================================== Loss: 0.003 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.4444444444444444, 'r': 0.03780718336483932, 'f1': 0.06968641114982578}, 'combined': 0.04945487242890862, 'epoch': 24} Test Chinese: {'template': {'p': 0.922077922077922, 'r': 0.5546875, 'f1': 0.6926829268292682}, 'slot': {'p': 0.5833333333333334, 'r': 0.020608439646712464, 'f1': 0.03981042654028436}, 'combined': 0.02757600277424575, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.4444444444444444, 'r': 0.03780718336483932, 'f1': 0.06968641114982578}, 'combined': 0.04945487242890862, 'epoch': 24} Test Korean: {'template': {'p': 0.922077922077922, 'r': 0.5546875, 'f1': 0.6926829268292682}, 'slot': {'p': 0.5588235294117647, 'r': 0.018645731108930325, 'f1': 0.03608736942070276}, 'combined': 0.024997104671901423, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.4444444444444444, 'r': 0.03780718336483932, 'f1': 0.06968641114982578}, 'combined': 0.04945487242890862, 'epoch': 24} Test Russian: {'template': {'p': 0.922077922077922, 'r': 0.5546875, 'f1': 0.6926829268292682}, 'slot': {'p': 0.5833333333333334, 'r': 0.020608439646712464, 'f1': 0.03981042654028436}, 'combined': 0.02757600277424575, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 25 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 10:56:27.362575: step: 4/77, loss: 0.030350100249052048 2023-01-22 10:56:28.746127: step: 8/77, loss: 3.5907471556129167e-06 2023-01-22 10:56:30.148346: step: 12/77, loss: 0.0317850299179554 2023-01-22 10:56:31.656466: step: 16/77, loss: 7.020366319920868e-05 2023-01-22 10:56:33.114681: step: 20/77, loss: 2.1531625407078536e-06 2023-01-22 10:56:34.554985: step: 24/77, loss: 0.0002119422279065475 2023-01-22 10:56:36.023693: step: 28/77, loss: 2.0130776192672784e-06 2023-01-22 10:56:37.437298: step: 32/77, loss: 1.1276445547991898e-05 2023-01-22 10:56:38.849018: step: 36/77, loss: 0.00117277551908046 2023-01-22 10:56:40.287866: step: 40/77, loss: 0.002095460193231702 2023-01-22 10:56:41.717029: step: 44/77, loss: 0.002477210946381092 2023-01-22 10:56:43.236421: step: 48/77, loss: 0.00013856086297892034 2023-01-22 10:56:44.661117: step: 52/77, loss: 0.00027021500864066184 2023-01-22 10:56:46.144364: step: 56/77, loss: 0.0034453249536454678 2023-01-22 10:56:47.637988: step: 60/77, loss: 6.887513791298261e-06 2023-01-22 10:56:49.094019: step: 64/77, loss: 1.2799639534932794e-06 2023-01-22 10:56:50.488454: step: 68/77, loss: 0.041449591517448425 2023-01-22 10:56:51.990138: step: 72/77, loss: 9.353303903480992e-05 2023-01-22 10:56:53.412668: step: 76/77, loss: 1.1487300071166828e-05 2023-01-22 10:56:54.800068: step: 80/77, loss: 0.0002724641526583582 2023-01-22 10:56:56.243866: step: 84/77, loss: 0.0012550866231322289 2023-01-22 10:56:57.734382: step: 88/77, loss: 0.0004702557052951306 2023-01-22 10:56:59.234106: step: 92/77, loss: 5.4410127631854266e-05 2023-01-22 10:57:00.658550: step: 96/77, loss: 0.0006709980079904199 2023-01-22 10:57:02.113358: step: 100/77, loss: 0.001236169831827283 2023-01-22 10:57:03.579524: step: 104/77, loss: 3.693071994348429e-05 2023-01-22 10:57:05.018745: step: 108/77, loss: 0.0002465422439854592 2023-01-22 10:57:06.507165: step: 112/77, loss: 0.0004932255251333117 2023-01-22 10:57:07.910092: step: 116/77, loss: 0.00036463479045778513 2023-01-22 10:57:09.321593: step: 120/77, loss: 5.476816659211181e-05 2023-01-22 10:57:10.765388: step: 124/77, loss: 2.9448181521729566e-05 2023-01-22 10:57:12.218245: step: 128/77, loss: 0.0026145465672016144 2023-01-22 10:57:13.692364: step: 132/77, loss: 0.03979451209306717 2023-01-22 10:57:15.196104: step: 136/77, loss: 3.248447058012971e-07 2023-01-22 10:57:16.646795: step: 140/77, loss: 0.0006305769784376025 2023-01-22 10:57:18.123406: step: 144/77, loss: 5.022615732741542e-05 2023-01-22 10:57:19.565863: step: 148/77, loss: 0.01937961019575596 2023-01-22 10:57:21.072272: step: 152/77, loss: 1.3635562027047854e-05 2023-01-22 10:57:22.526762: step: 156/77, loss: 7.278964403667487e-06 2023-01-22 10:57:23.937919: step: 160/77, loss: 6.216685142135248e-05 2023-01-22 10:57:25.407319: step: 164/77, loss: 1.2665967119573907e-07 2023-01-22 10:57:26.880585: step: 168/77, loss: 0.011341569945216179 2023-01-22 10:57:28.312059: step: 172/77, loss: 1.9737020920729265e-05 2023-01-22 10:57:29.775187: step: 176/77, loss: 5.379262120186468e-07 2023-01-22 10:57:31.234575: step: 180/77, loss: 3.039822900063882e-07 2023-01-22 10:57:32.786599: step: 184/77, loss: 0.0010536059271544218 2023-01-22 10:57:34.249681: step: 188/77, loss: 7.659011771465885e-07 2023-01-22 10:57:35.701742: step: 192/77, loss: 0.00901062786579132 2023-01-22 10:57:37.132932: step: 196/77, loss: 0.0001946162956301123 2023-01-22 10:57:38.666843: step: 200/77, loss: 0.001380533562041819 2023-01-22 10:57:40.102008: step: 204/77, loss: 2.5109788111876696e-05 2023-01-22 10:57:41.582155: step: 208/77, loss: 1.0168966582568828e-05 2023-01-22 10:57:43.021124: step: 212/77, loss: 8.078421524260193e-05 2023-01-22 10:57:44.447759: step: 216/77, loss: 0.004088700283318758 2023-01-22 10:57:45.911611: step: 220/77, loss: 2.3739892640151083e-05 2023-01-22 10:57:47.342037: step: 224/77, loss: 0.0006305679562501609 2023-01-22 10:57:48.748333: step: 228/77, loss: 0.0006910674856044352 2023-01-22 10:57:50.193100: step: 232/77, loss: 0.000501536822412163 2023-01-22 10:57:51.600460: step: 236/77, loss: 0.00013173221668694168 2023-01-22 10:57:53.048850: step: 240/77, loss: 0.01899598352611065 2023-01-22 10:57:54.486077: step: 244/77, loss: 5.1375267503317446e-05 2023-01-22 10:57:55.880977: step: 248/77, loss: 8.276186417788267e-05 2023-01-22 10:57:57.376677: step: 252/77, loss: 0.0019188302103430033 2023-01-22 10:57:58.932909: step: 256/77, loss: 1.1533292081367108e-06 2023-01-22 10:58:00.406613: step: 260/77, loss: 3.2556363294133916e-06 2023-01-22 10:58:01.874755: step: 264/77, loss: 4.795873974217102e-05 2023-01-22 10:58:03.360479: step: 268/77, loss: 6.93992551532574e-05 2023-01-22 10:58:04.840398: step: 272/77, loss: 0.07327759265899658 2023-01-22 10:58:06.263086: step: 276/77, loss: 0.0050186216831207275 2023-01-22 10:58:07.700133: step: 280/77, loss: 1.8968220274473424e-06 2023-01-22 10:58:09.188381: step: 284/77, loss: 1.6182351600946276e-06 2023-01-22 10:58:10.669658: step: 288/77, loss: 0.007353936322033405 2023-01-22 10:58:12.197246: step: 292/77, loss: 2.384172717029287e-07 2023-01-22 10:58:13.665857: step: 296/77, loss: 0.00028624635888263583 2023-01-22 10:58:15.065062: step: 300/77, loss: 0.000721846881788224 2023-01-22 10:58:16.515941: step: 304/77, loss: 2.752255750237964e-05 2023-01-22 10:58:17.924685: step: 308/77, loss: 0.011221768334507942 2023-01-22 10:58:19.395087: step: 312/77, loss: 8.016594392756815e-07 2023-01-22 10:58:20.876605: step: 316/77, loss: 0.0003219220379833132 2023-01-22 10:58:22.334584: step: 320/77, loss: 0.000156089459778741 2023-01-22 10:58:23.813187: step: 324/77, loss: 5.267148299026303e-05 2023-01-22 10:58:25.220614: step: 328/77, loss: 1.6732965377741493e-06 2023-01-22 10:58:26.674595: step: 332/77, loss: 7.914335583336651e-05 2023-01-22 10:58:28.100127: step: 336/77, loss: 0.0009551231632940471 2023-01-22 10:58:29.601531: step: 340/77, loss: 4.798146164830541e-07 2023-01-22 10:58:31.009851: step: 344/77, loss: 8.940444331528852e-07 2023-01-22 10:58:32.483673: step: 348/77, loss: 3.7869740481255576e-05 2023-01-22 10:58:33.880476: step: 352/77, loss: 0.000730576110072434 2023-01-22 10:58:35.332775: step: 356/77, loss: 9.28007375478046e-06 2023-01-22 10:58:36.751410: step: 360/77, loss: 0.011991672217845917 2023-01-22 10:58:38.221774: step: 364/77, loss: 5.795432662125677e-05 2023-01-22 10:58:39.664790: step: 368/77, loss: 2.2798698751103075e-07 2023-01-22 10:58:41.159341: step: 372/77, loss: 2.812264210660942e-05 2023-01-22 10:58:42.585828: step: 376/77, loss: 0.02710641361773014 2023-01-22 10:58:44.044142: step: 380/77, loss: 1.861944838310592e-05 2023-01-22 10:58:45.495988: step: 384/77, loss: 0.0006321167456917465 2023-01-22 10:58:46.853561: step: 388/77, loss: 0.00018395755614619702 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.050676305101166295, 'epoch': 25} Test Chinese: {'template': {'p': 0.9342105263157895, 'r': 0.5546875, 'f1': 0.6960784313725491}, 'slot': {'p': 0.6216216216216216, 'r': 0.022571148184494603, 'f1': 0.043560606060606064}, 'combined': 0.030321598336304226, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.050676305101166295, 'epoch': 25} Test Korean: {'template': {'p': 0.9333333333333333, 'r': 0.546875, 'f1': 0.689655172413793}, 'slot': {'p': 0.5675675675675675, 'r': 0.020608439646712464, 'f1': 0.03977272727272728}, 'combined': 0.0274294670846395, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.050676305101166295, 'epoch': 25} Test Russian: {'template': {'p': 0.9342105263157895, 'r': 0.5546875, 'f1': 0.6960784313725491}, 'slot': {'p': 0.5897435897435898, 'r': 0.022571148184494603, 'f1': 0.04347826086956522}, 'combined': 0.030264279624893444, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 26 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 11:00:31.011549: step: 4/77, loss: 1.8648388504516333e-05 2023-01-22 11:00:32.479769: step: 8/77, loss: 0.003614019602537155 2023-01-22 11:00:33.995096: step: 12/77, loss: 0.0036987748462706804 2023-01-22 11:00:35.401672: step: 16/77, loss: 8.188489118765574e-06 2023-01-22 11:00:36.806947: step: 20/77, loss: 0.00017946727166417986 2023-01-22 11:00:38.284941: step: 24/77, loss: 1.4870868653815705e-05 2023-01-22 11:00:39.728234: step: 28/77, loss: 0.0009488090872764587 2023-01-22 11:00:41.209571: step: 32/77, loss: 4.1778181184781715e-05 2023-01-22 11:00:42.719131: step: 36/77, loss: 0.0010264035081490874 2023-01-22 11:00:44.196308: step: 40/77, loss: 7.91398124420084e-05 2023-01-22 11:00:45.571034: step: 44/77, loss: 8.268310921266675e-06 2023-01-22 11:00:47.018239: step: 48/77, loss: 2.724805563047994e-05 2023-01-22 11:00:48.517296: step: 52/77, loss: 6.878773274365813e-05 2023-01-22 11:00:49.954081: step: 56/77, loss: 2.7371258966013556e-06 2023-01-22 11:00:51.415056: step: 60/77, loss: 1.9934646843466908e-05 2023-01-22 11:00:52.880358: step: 64/77, loss: 0.00021441919670905918 2023-01-22 11:00:54.341059: step: 68/77, loss: 0.0003429249918553978 2023-01-22 11:00:55.759084: step: 72/77, loss: 3.063391432078788e-06 2023-01-22 11:00:57.194004: step: 76/77, loss: 7.694316082051955e-06 2023-01-22 11:00:58.638174: step: 80/77, loss: 0.0010704833548516035 2023-01-22 11:01:00.075552: step: 84/77, loss: 0.000800688227172941 2023-01-22 11:01:01.495758: step: 88/77, loss: 0.00019355997210368514 2023-01-22 11:01:02.980667: step: 92/77, loss: 8.405933840549551e-06 2023-01-22 11:01:04.376266: step: 96/77, loss: 1.4901097245001438e-07 2023-01-22 11:01:05.848964: step: 100/77, loss: 3.2093255867948756e-05 2023-01-22 11:01:07.343350: step: 104/77, loss: 1.7448739981773542e-06 2023-01-22 11:01:08.705070: step: 108/77, loss: 7.703179107920732e-06 2023-01-22 11:01:10.090403: step: 112/77, loss: 0.001553960028104484 2023-01-22 11:01:11.540070: step: 116/77, loss: 8.19562373521876e-08 2023-01-22 11:01:12.989911: step: 120/77, loss: 7.058346091071144e-05 2023-01-22 11:01:14.432515: step: 124/77, loss: 0.0005340041243471205 2023-01-22 11:01:15.962438: step: 128/77, loss: 0.018589282408356667 2023-01-22 11:01:17.458382: step: 132/77, loss: 0.0001769806258380413 2023-01-22 11:01:19.001353: step: 136/77, loss: 1.1324689239700092e-06 2023-01-22 11:01:20.446604: step: 140/77, loss: 0.0002581391599960625 2023-01-22 11:01:21.976795: step: 144/77, loss: 0.0298842191696167 2023-01-22 11:01:23.401162: step: 148/77, loss: 3.725285324662764e-08 2023-01-22 11:01:24.840549: step: 152/77, loss: 0.0011672412510961294 2023-01-22 11:01:26.303983: step: 156/77, loss: 6.6999982664128765e-06 2023-01-22 11:01:27.719227: step: 160/77, loss: 0.0001974831393454224 2023-01-22 11:01:29.108787: step: 164/77, loss: 0.0011818100465461612 2023-01-22 11:01:30.644362: step: 168/77, loss: 0.12559227645397186 2023-01-22 11:01:32.101167: step: 172/77, loss: 0.00018192394054494798 2023-01-22 11:01:33.573017: step: 176/77, loss: 3.462537642917596e-05 2023-01-22 11:01:35.086387: step: 180/77, loss: 1.5296547644538805e-05 2023-01-22 11:01:36.485572: step: 184/77, loss: 3.6714420275529847e-05 2023-01-22 11:01:37.907458: step: 188/77, loss: 0.00013234214566182345 2023-01-22 11:01:39.351706: step: 192/77, loss: 3.090263362537371e-06 2023-01-22 11:01:40.803293: step: 196/77, loss: 0.014518280513584614 2023-01-22 11:01:42.232548: step: 200/77, loss: 0.000251075136475265 2023-01-22 11:01:43.659071: step: 204/77, loss: 0.04231609031558037 2023-01-22 11:01:45.089352: step: 208/77, loss: 2.1233238385320874e-06 2023-01-22 11:01:46.482504: step: 212/77, loss: 3.2484297207702184e-07 2023-01-22 11:01:48.012272: step: 216/77, loss: 3.600172931328416e-05 2023-01-22 11:01:49.433726: step: 220/77, loss: 2.236615046058432e-06 2023-01-22 11:01:50.920567: step: 224/77, loss: 4.7323348553618416e-05 2023-01-22 11:01:52.305457: step: 228/77, loss: 0.0005846671992912889 2023-01-22 11:01:53.810988: step: 232/77, loss: 3.072257413805346e-06 2023-01-22 11:01:55.309381: step: 236/77, loss: 9.84782527666539e-05 2023-01-22 11:01:56.704619: step: 240/77, loss: 0.004049063194543123 2023-01-22 11:01:58.203235: step: 244/77, loss: 6.586230369975965e-07 2023-01-22 11:01:59.653872: step: 248/77, loss: 0.0015713156899437308 2023-01-22 11:02:01.104303: step: 252/77, loss: 1.6120588043122552e-05 2023-01-22 11:02:02.558644: step: 256/77, loss: 1.328440794168273e-05 2023-01-22 11:02:04.061163: step: 260/77, loss: 0.0005180090083740652 2023-01-22 11:02:05.503207: step: 264/77, loss: 0.0010391356190666556 2023-01-22 11:02:06.853936: step: 268/77, loss: 3.50178343069274e-05 2023-01-22 11:02:08.255314: step: 272/77, loss: 2.1308568420863594e-07 2023-01-22 11:02:09.713469: step: 276/77, loss: 0.0014618869172409177 2023-01-22 11:02:11.167438: step: 280/77, loss: 0.0007588414591737092 2023-01-22 11:02:12.637801: step: 284/77, loss: 0.06243320554494858 2023-01-22 11:02:14.067866: step: 288/77, loss: 9.618224794394337e-06 2023-01-22 11:02:15.517103: step: 292/77, loss: 9.983750715036876e-08 2023-01-22 11:02:16.940579: step: 296/77, loss: 0.00012859278649557382 2023-01-22 11:02:18.409116: step: 300/77, loss: 2.0861565985796915e-07 2023-01-22 11:02:19.830595: step: 304/77, loss: 4.0165257814805955e-05 2023-01-22 11:02:21.310757: step: 308/77, loss: 1.2351800251053646e-05 2023-01-22 11:02:22.794954: step: 312/77, loss: 2.354362891310302e-07 2023-01-22 11:02:24.260628: step: 316/77, loss: 0.012203582562506199 2023-01-22 11:02:25.729832: step: 320/77, loss: 0.05889744311571121 2023-01-22 11:02:27.158690: step: 324/77, loss: 7.20866046322044e-06 2023-01-22 11:02:28.581570: step: 328/77, loss: 1.6941910416790051e-06 2023-01-22 11:02:29.972295: step: 332/77, loss: 0.008220874704420567 2023-01-22 11:02:31.431923: step: 336/77, loss: 0.02381768450140953 2023-01-22 11:02:32.940661: step: 340/77, loss: 3.486860578050255e-07 2023-01-22 11:02:34.409617: step: 344/77, loss: 2.9442082905006828e-06 2023-01-22 11:02:35.847494: step: 348/77, loss: 0.0016949096461758018 2023-01-22 11:02:37.262256: step: 352/77, loss: 4.277956577425357e-06 2023-01-22 11:02:38.728266: step: 356/77, loss: 2.846114171006775e-07 2023-01-22 11:02:40.194192: step: 360/77, loss: 7.524974989792099e-07 2023-01-22 11:02:41.654295: step: 364/77, loss: 5.088774560135789e-05 2023-01-22 11:02:43.144915: step: 368/77, loss: 0.0001697741390671581 2023-01-22 11:02:44.565268: step: 372/77, loss: 0.009770647622644901 2023-01-22 11:02:46.026036: step: 376/77, loss: 0.00020992070494685322 2023-01-22 11:02:47.422155: step: 380/77, loss: 3.278218798641319e-07 2023-01-22 11:02:48.873353: step: 384/77, loss: 2.631887764437124e-05 2023-01-22 11:02:50.283769: step: 388/77, loss: 0.000318748876452446 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05161766061388146, 'epoch': 26} Test Chinese: {'template': {'p': 0.9558823529411765, 'r': 0.5078125, 'f1': 0.6632653061224489}, 'slot': {'p': 0.6470588235294118, 'r': 0.021589793915603533, 'f1': 0.04178537511870845}, 'combined': 0.02771478961955152, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05161766061388146, 'epoch': 26} Test Korean: {'template': {'p': 0.9558823529411765, 'r': 0.5078125, 'f1': 0.6632653061224489}, 'slot': {'p': 0.6470588235294118, 'r': 0.021589793915603533, 'f1': 0.04178537511870845}, 'combined': 0.02771478961955152, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05161766061388146, 'epoch': 26} Test Russian: {'template': {'p': 0.9558823529411765, 'r': 0.5078125, 'f1': 0.6632653061224489}, 'slot': {'p': 0.6470588235294118, 'r': 0.021589793915603533, 'f1': 0.04178537511870845}, 'combined': 0.02771478961955152, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 27 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 11:04:34.462539: step: 4/77, loss: 3.606061227401369e-07 2023-01-22 11:04:35.914718: step: 8/77, loss: 2.7565567961573834e-06 2023-01-22 11:04:37.418298: step: 12/77, loss: 1.0385904261056567e-06 2023-01-22 11:04:38.901668: step: 16/77, loss: 7.437886961270124e-05 2023-01-22 11:04:40.290159: step: 20/77, loss: 9.661864896770567e-05 2023-01-22 11:04:41.730613: step: 24/77, loss: 3.726641079992987e-05 2023-01-22 11:04:43.198396: step: 28/77, loss: 5.066294761491008e-07 2023-01-22 11:04:44.615914: step: 32/77, loss: 1.8862918295781128e-05 2023-01-22 11:04:46.065387: step: 36/77, loss: 1.8645439922693186e-05 2023-01-22 11:04:47.476102: step: 40/77, loss: 9.87338262348203e-06 2023-01-22 11:04:48.934498: step: 44/77, loss: 0.01699930801987648 2023-01-22 11:04:50.355969: step: 48/77, loss: 1.1026840951444683e-07 2023-01-22 11:04:51.810721: step: 52/77, loss: 0.0002957044052891433 2023-01-22 11:04:53.268023: step: 56/77, loss: 1.7434240362490527e-07 2023-01-22 11:04:54.686465: step: 60/77, loss: 0.00021177512826398015 2023-01-22 11:04:56.085196: step: 64/77, loss: 3.229175854357891e-05 2023-01-22 11:04:57.557323: step: 68/77, loss: 0.046003349125385284 2023-01-22 11:04:59.028009: step: 72/77, loss: 2.8995225420658244e-06 2023-01-22 11:05:00.480893: step: 76/77, loss: 0.1521517038345337 2023-01-22 11:05:02.002401: step: 80/77, loss: 0.0002786411496344954 2023-01-22 11:05:03.453623: step: 84/77, loss: 1.475212059176556e-07 2023-01-22 11:05:04.869011: step: 88/77, loss: 1.1026831714389118e-07 2023-01-22 11:05:06.275587: step: 92/77, loss: 0.0024277365300804377 2023-01-22 11:05:07.711094: step: 96/77, loss: 0.0009051822707988322 2023-01-22 11:05:09.148945: step: 100/77, loss: 0.006404219660907984 2023-01-22 11:05:10.615320: step: 104/77, loss: 0.10981199145317078 2023-01-22 11:05:12.117831: step: 108/77, loss: 0.006981880869716406 2023-01-22 11:05:13.601962: step: 112/77, loss: 0.0007049435516819358 2023-01-22 11:05:15.047122: step: 116/77, loss: 0.0004604582500178367 2023-01-22 11:05:16.605389: step: 120/77, loss: 0.00011910688772331923 2023-01-22 11:05:18.051228: step: 124/77, loss: 1.693453850748483e-05 2023-01-22 11:05:19.571250: step: 128/77, loss: 1.7974909496842884e-05 2023-01-22 11:05:20.981969: step: 132/77, loss: 4.464502490009181e-05 2023-01-22 11:05:22.415216: step: 136/77, loss: 0.0034254807978868484 2023-01-22 11:05:23.953144: step: 140/77, loss: 5.8066685596713796e-05 2023-01-22 11:05:25.403307: step: 144/77, loss: 0.006067907437682152 2023-01-22 11:05:26.860687: step: 148/77, loss: 0.00030065476312302053 2023-01-22 11:05:28.271953: step: 152/77, loss: 4.232639548717998e-05 2023-01-22 11:05:29.720288: step: 156/77, loss: 2.042884716502158e-06 2023-01-22 11:05:31.153557: step: 160/77, loss: 0.02161801978945732 2023-01-22 11:05:32.622835: step: 164/77, loss: 2.4465957721986342e-06 2023-01-22 11:05:34.036200: step: 168/77, loss: 7.546792403445579e-06 2023-01-22 11:05:35.465371: step: 172/77, loss: 0.017119623720645905 2023-01-22 11:05:36.852582: step: 176/77, loss: 0.0001016731548588723 2023-01-22 11:05:38.292821: step: 180/77, loss: 4.181843905826099e-05 2023-01-22 11:05:39.714005: step: 184/77, loss: 0.19686707854270935 2023-01-22 11:05:41.163522: step: 188/77, loss: 0.008008151315152645 2023-01-22 11:05:42.670279: step: 192/77, loss: 1.138431343861157e-06 2023-01-22 11:05:44.071317: step: 196/77, loss: 0.0228792242705822 2023-01-22 11:05:45.522837: step: 200/77, loss: 0.0002090672787744552 2023-01-22 11:05:47.023045: step: 204/77, loss: 3.613301032601157e-06 2023-01-22 11:05:48.461716: step: 208/77, loss: 0.0005431215977296233 2023-01-22 11:05:49.866050: step: 212/77, loss: 1.569067080708919e-06 2023-01-22 11:05:51.331721: step: 216/77, loss: 0.0012952117249369621 2023-01-22 11:05:52.820210: step: 220/77, loss: 2.108344915541238e-06 2023-01-22 11:05:54.310146: step: 224/77, loss: 0.00017514584760647267 2023-01-22 11:05:55.784817: step: 228/77, loss: 0.0004233909712638706 2023-01-22 11:05:57.197283: step: 232/77, loss: 4.133952097618021e-05 2023-01-22 11:05:58.684175: step: 236/77, loss: 8.803592209005728e-06 2023-01-22 11:06:00.043401: step: 240/77, loss: 1.7240215584024554e-06 2023-01-22 11:06:01.453989: step: 244/77, loss: 0.0004245509917382151 2023-01-22 11:06:02.831904: step: 248/77, loss: 1.5050139268169005e-07 2023-01-22 11:06:04.346832: step: 252/77, loss: 0.022179974243044853 2023-01-22 11:06:05.774743: step: 256/77, loss: 0.0001255650568054989 2023-01-22 11:06:07.250977: step: 260/77, loss: 4.008350060757948e-07 2023-01-22 11:06:08.713323: step: 264/77, loss: 0.0027482016012072563 2023-01-22 11:06:10.154447: step: 268/77, loss: 0.00022981772781349719 2023-01-22 11:06:11.550035: step: 272/77, loss: 7.01972112437943e-06 2023-01-22 11:06:13.018601: step: 276/77, loss: 3.4765056625474244e-05 2023-01-22 11:06:14.452186: step: 280/77, loss: 0.004884883761405945 2023-01-22 11:06:15.849424: step: 284/77, loss: 9.70681958278874e-06 2023-01-22 11:06:17.293418: step: 288/77, loss: 0.011655522510409355 2023-01-22 11:06:18.815071: step: 292/77, loss: 6.033683803252643e-06 2023-01-22 11:06:20.306936: step: 296/77, loss: 0.00023782583593856543 2023-01-22 11:06:21.777623: step: 300/77, loss: 0.004693306982517242 2023-01-22 11:06:23.187804: step: 304/77, loss: 0.00043961359187960625 2023-01-22 11:06:24.626660: step: 308/77, loss: 0.009037474170327187 2023-01-22 11:06:26.049085: step: 312/77, loss: 9.109323582379147e-05 2023-01-22 11:06:27.476496: step: 316/77, loss: 5.065477125754114e-06 2023-01-22 11:06:28.921659: step: 320/77, loss: 2.5830046070041135e-05 2023-01-22 11:06:30.367369: step: 324/77, loss: 4.08288713060756e-07 2023-01-22 11:06:31.849732: step: 328/77, loss: 5.0793074478860945e-05 2023-01-22 11:06:33.253118: step: 332/77, loss: 2.865168426069431e-05 2023-01-22 11:06:34.659743: step: 336/77, loss: 2.9721433747909032e-05 2023-01-22 11:06:36.083819: step: 340/77, loss: 0.0006815999513491988 2023-01-22 11:06:37.583013: step: 344/77, loss: 5.092594801681116e-05 2023-01-22 11:06:39.088519: step: 348/77, loss: 1.8298146642337088e-06 2023-01-22 11:06:40.646830: step: 352/77, loss: 1.0752620255516376e-05 2023-01-22 11:06:42.150816: step: 356/77, loss: 3.539942917996086e-05 2023-01-22 11:06:43.638608: step: 360/77, loss: 0.0002795817272271961 2023-01-22 11:06:45.074851: step: 364/77, loss: 0.00022478778555523604 2023-01-22 11:06:46.548104: step: 368/77, loss: 0.0007004109211266041 2023-01-22 11:06:48.010584: step: 372/77, loss: 0.00017398054478690028 2023-01-22 11:06:49.495967: step: 376/77, loss: 1.4692169543195632e-06 2023-01-22 11:06:50.967692: step: 380/77, loss: 4.7235914735210827e-07 2023-01-22 11:06:52.400980: step: 384/77, loss: 0.001804694184102118 2023-01-22 11:06:53.874381: step: 388/77, loss: 0.00010899404878728092 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.46511627906976744, 'r': 0.03780718336483932, 'f1': 0.06993006993006994}, 'combined': 0.050587710162178244, 'epoch': 27} Test Chinese: {'template': {'p': 0.92, 'r': 0.5390625, 'f1': 0.6798029556650247}, 'slot': {'p': 0.575, 'r': 0.022571148184494603, 'f1': 0.04343720491029273}, 'combined': 0.02952874028384432, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.46511627906976744, 'r': 0.03780718336483932, 'f1': 0.06993006993006994}, 'combined': 0.050587710162178244, 'epoch': 27} Test Korean: {'template': {'p': 0.92, 'r': 0.5390625, 'f1': 0.6798029556650247}, 'slot': {'p': 0.575, 'r': 0.022571148184494603, 'f1': 0.04343720491029273}, 'combined': 0.02952874028384432, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.46511627906976744, 'r': 0.03780718336483932, 'f1': 0.06993006993006994}, 'combined': 0.050587710162178244, 'epoch': 27} Test Russian: {'template': {'p': 0.92, 'r': 0.5390625, 'f1': 0.6798029556650247}, 'slot': {'p': 0.575, 'r': 0.022571148184494603, 'f1': 0.04343720491029273}, 'combined': 0.02952874028384432, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 28 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 11:08:38.379729: step: 4/77, loss: 0.013731489889323711 2023-01-22 11:08:39.826378: step: 8/77, loss: 0.0010407003574073315 2023-01-22 11:08:41.286039: step: 12/77, loss: 0.0005618635332211852 2023-01-22 11:08:42.775666: step: 16/77, loss: 0.0029245370533317327 2023-01-22 11:08:44.181138: step: 20/77, loss: 2.4019689135457156e-06 2023-01-22 11:08:45.641747: step: 24/77, loss: 5.255473297438584e-05 2023-01-22 11:08:47.064339: step: 28/77, loss: 0.007973378524184227 2023-01-22 11:08:48.542892: step: 32/77, loss: 0.00712230708450079 2023-01-22 11:08:50.010985: step: 36/77, loss: 1.300935309700435e-05 2023-01-22 11:08:51.497208: step: 40/77, loss: 3.725287456290971e-08 2023-01-22 11:08:52.965126: step: 44/77, loss: 0.0003985797520726919 2023-01-22 11:08:54.438244: step: 48/77, loss: 3.1143270007305546e-07 2023-01-22 11:08:55.925012: step: 52/77, loss: 4.842793828174763e-07 2023-01-22 11:08:57.340885: step: 56/77, loss: 2.2053367501939647e-05 2023-01-22 11:08:58.840709: step: 60/77, loss: 0.00310569372959435 2023-01-22 11:09:00.256886: step: 64/77, loss: 0.0004489465500228107 2023-01-22 11:09:01.712418: step: 68/77, loss: 4.373181582195684e-06 2023-01-22 11:09:03.146364: step: 72/77, loss: 0.047484856098890305 2023-01-22 11:09:04.554605: step: 76/77, loss: 0.001078817993402481 2023-01-22 11:09:05.967633: step: 80/77, loss: 0.0004845151852350682 2023-01-22 11:09:07.429826: step: 84/77, loss: 5.875883289263584e-06 2023-01-22 11:09:08.899987: step: 88/77, loss: 4.23568781116046e-06 2023-01-22 11:09:10.339410: step: 92/77, loss: 0.003169774077832699 2023-01-22 11:09:11.787933: step: 96/77, loss: 0.001381977228447795 2023-01-22 11:09:13.204010: step: 100/77, loss: 0.00012675896869041026 2023-01-22 11:09:14.635382: step: 104/77, loss: 0.00028056855080649257 2023-01-22 11:09:16.095471: step: 108/77, loss: 2.276324448757805e-05 2023-01-22 11:09:17.604985: step: 112/77, loss: 0.0013658979441970587 2023-01-22 11:09:19.026189: step: 116/77, loss: 0.03549131751060486 2023-01-22 11:09:20.507597: step: 120/77, loss: 3.8398309698095545e-05 2023-01-22 11:09:22.011214: step: 124/77, loss: 1.266596001414655e-07 2023-01-22 11:09:23.430145: step: 128/77, loss: 0.06559224426746368 2023-01-22 11:09:24.854932: step: 132/77, loss: 7.129681307560531e-06 2023-01-22 11:09:26.269300: step: 136/77, loss: 5.930523343522509e-07 2023-01-22 11:09:27.705084: step: 140/77, loss: 0.017023207619786263 2023-01-22 11:09:29.151134: step: 144/77, loss: 0.0009497597930021584 2023-01-22 11:09:30.586972: step: 148/77, loss: 0.0005004429258406162 2023-01-22 11:09:32.018228: step: 152/77, loss: 0.0023720364551991224 2023-01-22 11:09:33.478680: step: 156/77, loss: 3.3378313446519314e-07 2023-01-22 11:09:34.892692: step: 160/77, loss: 3.427231547448173e-07 2023-01-22 11:09:36.321588: step: 164/77, loss: 0.004157646559178829 2023-01-22 11:09:37.730284: step: 168/77, loss: 0.002858922118321061 2023-01-22 11:09:39.141648: step: 172/77, loss: 9.284650332119782e-06 2023-01-22 11:09:40.541752: step: 176/77, loss: 7.88212400948396e-06 2023-01-22 11:09:42.035621: step: 180/77, loss: 0.0003945681673940271 2023-01-22 11:09:43.528098: step: 184/77, loss: 3.2046635169535875e-05 2023-01-22 11:09:44.966614: step: 188/77, loss: 0.0011122300056740642 2023-01-22 11:09:46.463289: step: 192/77, loss: 1.002837279884261e-06 2023-01-22 11:09:47.929356: step: 196/77, loss: 0.06045089662075043 2023-01-22 11:09:49.447552: step: 200/77, loss: 1.2665967119573907e-07 2023-01-22 11:09:50.881184: step: 204/77, loss: 0.0015796440420672297 2023-01-22 11:09:52.371147: step: 208/77, loss: 1.553771289763972e-05 2023-01-22 11:09:53.820139: step: 212/77, loss: 1.7567413124197628e-06 2023-01-22 11:09:55.304049: step: 216/77, loss: 2.616386609588517e-06 2023-01-22 11:09:56.704055: step: 220/77, loss: 0.0005074563669040799 2023-01-22 11:09:58.205216: step: 224/77, loss: 1.072302802640479e-05 2023-01-22 11:09:59.611319: step: 228/77, loss: 0.0009682394447736442 2023-01-22 11:10:01.088719: step: 232/77, loss: 3.7219822843326256e-05 2023-01-22 11:10:02.521035: step: 236/77, loss: 4.753395899115276e-07 2023-01-22 11:10:04.039857: step: 240/77, loss: 5.6268167099915445e-05 2023-01-22 11:10:05.467648: step: 244/77, loss: 1.765011802490335e-05 2023-01-22 11:10:06.886852: step: 248/77, loss: 6.603108340641484e-05 2023-01-22 11:10:08.349966: step: 252/77, loss: 4.001375418738462e-05 2023-01-22 11:10:09.753609: step: 256/77, loss: 1.2814932404126012e-07 2023-01-22 11:10:11.192685: step: 260/77, loss: 3.8165017031133175e-05 2023-01-22 11:10:12.598326: step: 264/77, loss: 2.2879685275256634e-05 2023-01-22 11:10:14.118861: step: 268/77, loss: 0.0009290733723901212 2023-01-22 11:10:15.574581: step: 272/77, loss: 2.5331964081942715e-08 2023-01-22 11:10:17.027879: step: 276/77, loss: 0.0009572876733727753 2023-01-22 11:10:18.448387: step: 280/77, loss: 2.2842411908641225e-06 2023-01-22 11:10:19.843107: step: 284/77, loss: 0.0002019900275627151 2023-01-22 11:10:21.266088: step: 288/77, loss: 0.004656798206269741 2023-01-22 11:10:22.701380: step: 292/77, loss: 7.271652293638908e-07 2023-01-22 11:10:24.092342: step: 296/77, loss: 5.6624305244667994e-08 2023-01-22 11:10:25.511974: step: 300/77, loss: 0.0018320352537557483 2023-01-22 11:10:26.994702: step: 304/77, loss: 0.0008556064567528665 2023-01-22 11:10:28.454323: step: 308/77, loss: 2.3556367523269728e-05 2023-01-22 11:10:29.892931: step: 312/77, loss: 0.14063824713230133 2023-01-22 11:10:31.306514: step: 316/77, loss: 5.176966806175187e-05 2023-01-22 11:10:32.758967: step: 320/77, loss: 1.275466911465628e-05 2023-01-22 11:10:34.209030: step: 324/77, loss: 6.16900535987952e-07 2023-01-22 11:10:35.592697: step: 328/77, loss: 0.00014278925664257258 2023-01-22 11:10:36.945536: step: 332/77, loss: 7.105967142706504e-06 2023-01-22 11:10:38.384115: step: 336/77, loss: 0.001324579818174243 2023-01-22 11:10:39.880719: step: 340/77, loss: 4.3302843550918624e-05 2023-01-22 11:10:41.287999: step: 344/77, loss: 7.405750466205063e-07 2023-01-22 11:10:42.796401: step: 348/77, loss: 0.00011049366003135219 2023-01-22 11:10:44.175582: step: 352/77, loss: 7.489207200706005e-05 2023-01-22 11:10:45.652275: step: 356/77, loss: 2.342394509469159e-06 2023-01-22 11:10:47.103653: step: 360/77, loss: 2.3192773369373754e-05 2023-01-22 11:10:48.581997: step: 364/77, loss: 0.0006508184014819562 2023-01-22 11:10:50.042656: step: 368/77, loss: 0.0004297401465009898 2023-01-22 11:10:51.507290: step: 372/77, loss: 0.00012195734598208219 2023-01-22 11:10:52.986684: step: 376/77, loss: 2.36168352785171e-06 2023-01-22 11:10:54.436962: step: 380/77, loss: 2.2057285605114885e-05 2023-01-22 11:10:55.833341: step: 384/77, loss: 1.3709006907447474e-07 2023-01-22 11:10:57.241677: step: 388/77, loss: 0.00016052668797783554 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.050765210899589394, 'epoch': 28} Test Chinese: {'template': {'p': 0.9358974358974359, 'r': 0.5703125, 'f1': 0.7087378640776699}, 'slot': {'p': 0.6216216216216216, 'r': 0.022571148184494603, 'f1': 0.043560606060606064}, 'combined': 0.030873050897322746, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.050765210899589394, 'epoch': 28} Test Korean: {'template': {'p': 0.9125, 'r': 0.5703125, 'f1': 0.7019230769230769}, 'slot': {'p': 0.6216216216216216, 'r': 0.022571148184494603, 'f1': 0.043560606060606064}, 'combined': 0.03057619463869464, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.050765210899589394, 'epoch': 28} Test Russian: {'template': {'p': 0.9240506329113924, 'r': 0.5703125, 'f1': 0.7053140096618359}, 'slot': {'p': 0.6216216216216216, 'r': 0.022571148184494603, 'f1': 0.043560606060606064}, 'combined': 0.030723905723905733, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 29 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 11:12:41.308186: step: 4/77, loss: 1.817933679149064e-07 2023-01-22 11:12:42.754613: step: 8/77, loss: 0.040769945830106735 2023-01-22 11:12:44.169339: step: 12/77, loss: 1.5795183117006673e-07 2023-01-22 11:12:45.587333: step: 16/77, loss: 1.4990278032200877e-06 2023-01-22 11:12:47.025121: step: 20/77, loss: 0.001043134368956089 2023-01-22 11:12:48.493928: step: 24/77, loss: 0.00025510074920021 2023-01-22 11:12:49.944161: step: 28/77, loss: 0.00031373612000606954 2023-01-22 11:12:51.437812: step: 32/77, loss: 1.9087267446593614e-06 2023-01-22 11:12:52.843851: step: 36/77, loss: 0.09458360075950623 2023-01-22 11:12:54.304037: step: 40/77, loss: 3.2746251235948876e-05 2023-01-22 11:12:55.769370: step: 44/77, loss: 0.00016612530453130603 2023-01-22 11:12:57.151210: step: 48/77, loss: 2.814584604493575e-06 2023-01-22 11:12:58.572545: step: 52/77, loss: 0.001565206446684897 2023-01-22 11:12:59.898002: step: 56/77, loss: 5.4112715588416904e-05 2023-01-22 11:13:01.344325: step: 60/77, loss: 0.0010413212003186345 2023-01-22 11:13:02.793767: step: 64/77, loss: 3.0024616535229143e-06 2023-01-22 11:13:04.244400: step: 68/77, loss: 0.02369655668735504 2023-01-22 11:13:05.640716: step: 72/77, loss: 0.008374444209039211 2023-01-22 11:13:07.047656: step: 76/77, loss: 6.269232471822761e-06 2023-01-22 11:13:08.508184: step: 80/77, loss: 3.87479412893299e-05 2023-01-22 11:13:09.981521: step: 84/77, loss: 2.682207878024201e-08 2023-01-22 11:13:11.491722: step: 88/77, loss: 1.1026813950820724e-07 2023-01-22 11:13:12.888700: step: 92/77, loss: 3.655839100247249e-05 2023-01-22 11:13:14.321589: step: 96/77, loss: 1.411835546605289e-05 2023-01-22 11:13:15.758065: step: 100/77, loss: 1.626298217161093e-05 2023-01-22 11:13:17.235225: step: 104/77, loss: 0.001862027682363987 2023-01-22 11:13:18.660963: step: 108/77, loss: 2.682184572222468e-07 2023-01-22 11:13:20.120583: step: 112/77, loss: 6.273245389820659e-07 2023-01-22 11:13:21.532246: step: 116/77, loss: 0.020532015711069107 2023-01-22 11:13:22.968801: step: 120/77, loss: 2.0443840185180306e-06 2023-01-22 11:13:24.446160: step: 124/77, loss: 4.589517175190849e-07 2023-01-22 11:13:25.904353: step: 128/77, loss: 2.2947628508518392e-07 2023-01-22 11:13:27.320231: step: 132/77, loss: 0.0004681869759224355 2023-01-22 11:13:28.721034: step: 136/77, loss: 2.6970980115947896e-07 2023-01-22 11:13:30.216719: step: 140/77, loss: 2.552435717007029e-06 2023-01-22 11:13:31.670059: step: 144/77, loss: 1.7097081581596285e-05 2023-01-22 11:13:33.122077: step: 148/77, loss: 1.3411013810582517e-07 2023-01-22 11:13:34.581033: step: 152/77, loss: 1.5874025848461315e-05 2023-01-22 11:13:36.109323: step: 156/77, loss: 2.0995241811760934e-06 2023-01-22 11:13:37.536673: step: 160/77, loss: 1.5233533304126468e-05 2023-01-22 11:13:39.032598: step: 164/77, loss: 0.002553701400756836 2023-01-22 11:13:40.483554: step: 168/77, loss: 5.59375221200753e-05 2023-01-22 11:13:41.904253: step: 172/77, loss: 9.596681593393441e-06 2023-01-22 11:13:43.308706: step: 176/77, loss: 0.022904738783836365 2023-01-22 11:13:44.761812: step: 180/77, loss: 1.7314723663730547e-06 2023-01-22 11:13:46.221853: step: 184/77, loss: 4.137518772040494e-06 2023-01-22 11:13:47.701808: step: 188/77, loss: 6.556387575074041e-07 2023-01-22 11:13:49.129911: step: 192/77, loss: 8.6360860223067e-06 2023-01-22 11:13:50.563424: step: 196/77, loss: 0.001840737764723599 2023-01-22 11:13:52.034633: step: 200/77, loss: 2.980230462412692e-08 2023-01-22 11:13:53.486533: step: 204/77, loss: 1.0654099241946824e-05 2023-01-22 11:13:54.977713: step: 208/77, loss: 1.0002829185395967e-05 2023-01-22 11:13:56.394055: step: 212/77, loss: 0.00041803918429650366 2023-01-22 11:13:57.859256: step: 216/77, loss: 0.00032258350984193385 2023-01-22 11:13:59.314986: step: 220/77, loss: 7.789167284499854e-06 2023-01-22 11:14:00.747295: step: 224/77, loss: 4.469942268769955e-06 2023-01-22 11:14:02.168635: step: 228/77, loss: 0.00010209907486569136 2023-01-22 11:14:03.642982: step: 232/77, loss: 1.5556329344690312e-06 2023-01-22 11:14:05.132994: step: 236/77, loss: 1.7775905689632054e-06 2023-01-22 11:14:06.553923: step: 240/77, loss: 0.001098861452192068 2023-01-22 11:14:08.015081: step: 244/77, loss: 0.00037572160363197327 2023-01-22 11:14:09.469699: step: 248/77, loss: 1.2218926315199496e-07 2023-01-22 11:14:10.929623: step: 252/77, loss: 0.004170549102127552 2023-01-22 11:14:12.437007: step: 256/77, loss: 1.2397180171319633e-06 2023-01-22 11:14:13.868562: step: 260/77, loss: 0.00014179742720443755 2023-01-22 11:14:15.239529: step: 264/77, loss: 0.0004720209108199924 2023-01-22 11:14:16.682633: step: 268/77, loss: 0.0011638924479484558 2023-01-22 11:14:18.145552: step: 272/77, loss: 0.0005042596021667123 2023-01-22 11:14:19.616969: step: 276/77, loss: 4.947142997480114e-07 2023-01-22 11:14:21.078561: step: 280/77, loss: 0.0010161111131310463 2023-01-22 11:14:22.583604: step: 284/77, loss: 0.0016872090054675937 2023-01-22 11:14:24.080050: step: 288/77, loss: 7.378399914159672e-06 2023-01-22 11:14:25.495834: step: 292/77, loss: 2.7567045890464215e-07 2023-01-22 11:14:26.976512: step: 296/77, loss: 0.005593928974121809 2023-01-22 11:14:28.460209: step: 300/77, loss: 2.3092803530744277e-05 2023-01-22 11:14:29.926477: step: 304/77, loss: 0.0003652371233329177 2023-01-22 11:14:31.436746: step: 308/77, loss: 0.003939785063266754 2023-01-22 11:14:32.865088: step: 312/77, loss: 0.0016041402705013752 2023-01-22 11:14:34.385410: step: 316/77, loss: 0.3785771131515503 2023-01-22 11:14:35.866983: step: 320/77, loss: 5.214817520027282e-06 2023-01-22 11:14:37.277487: step: 324/77, loss: 5.930572797296918e-07 2023-01-22 11:14:38.728719: step: 328/77, loss: 8.526707460987382e-06 2023-01-22 11:14:40.189047: step: 332/77, loss: 2.276741042805952e-06 2023-01-22 11:14:41.680632: step: 336/77, loss: 1.1235166539336205e-06 2023-01-22 11:14:43.180267: step: 340/77, loss: 1.3290504284668714e-05 2023-01-22 11:14:44.646213: step: 344/77, loss: 0.1279335618019104 2023-01-22 11:14:46.051819: step: 348/77, loss: 1.597791924723424e-05 2023-01-22 11:14:47.478068: step: 352/77, loss: 0.00090602453565225 2023-01-22 11:14:48.917247: step: 356/77, loss: 3.4596016575960675e-06 2023-01-22 11:14:50.418564: step: 360/77, loss: 3.472837488516234e-05 2023-01-22 11:14:51.880484: step: 364/77, loss: 1.6152355328813428e-06 2023-01-22 11:14:53.364212: step: 368/77, loss: 9.981415132642724e-06 2023-01-22 11:14:54.845204: step: 372/77, loss: 9.092828258872032e-05 2023-01-22 11:14:56.295627: step: 376/77, loss: 7.450569228240056e-08 2023-01-22 11:14:57.696652: step: 380/77, loss: 1.962437181646237e-06 2023-01-22 11:14:59.215434: step: 384/77, loss: 1.1756699223042233e-06 2023-01-22 11:15:00.675465: step: 388/77, loss: 6.934347038622946e-05 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05161766061388146, 'epoch': 29} Test Chinese: {'template': {'p': 0.9102564102564102, 'r': 0.5546875, 'f1': 0.6893203883495146}, 'slot': {'p': 0.46511627906976744, 'r': 0.019627085377821395, 'f1': 0.037664783427495296}, 'combined': 0.025963103139341418, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05161766061388146, 'epoch': 29} Test Korean: {'template': {'p': 0.9078947368421053, 'r': 0.5390625, 'f1': 0.6764705882352942}, 'slot': {'p': 0.5, 'r': 0.019627085377821395, 'f1': 0.03777148253068933}, 'combined': 0.025551297006054546, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05161766061388146, 'epoch': 29} Test Russian: {'template': {'p': 0.9102564102564102, 'r': 0.5546875, 'f1': 0.6893203883495146}, 'slot': {'p': 0.47619047619047616, 'r': 0.019627085377821395, 'f1': 0.037700282752120645}, 'combined': 0.02598757354757831, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9516129032258065, 'r': 0.4609375, 'f1': 0.6210526315789474}, 'slot': {'p': 0.6451612903225806, 'r': 0.019627085377821395, 'f1': 0.03809523809523809}, 'combined': 0.023659147869674185, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2}