Command that produces this log: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> trans_rep.weight: torch.Size([1024, 2048]) >>> trans_rep.bias: torch.Size([1024]) >>> hidden_ffns.Corruplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Corruplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Cybercrimeplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Cybercrimeplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Disasterplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Disasterplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Displacementplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Displacementplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Epidemiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Epidemiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Etiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Etiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Protestplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Protestplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Terrorplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Terrorplate.layers.0.bias: torch.Size([768]) >>> template_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> type_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Corruplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Corruplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Disasterplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Disasterplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Displacementplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Displacementplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Epidemiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Epidemiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Etiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Etiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Protestplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Protestplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Terrorplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Terrorplate.layers.1.bias: torch.Size([6]) >>> completion_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Corruplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Corruplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Disasterplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Disasterplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Displacementplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Displacementplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Epidemiplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Epidemiplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Etiplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Etiplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Protestplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Protestplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Terrorplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Terrorplate.layers.1.bias: torch.Size([4]) >>> overtime_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) n_trainable_params: 582185936, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:39:42.310691: step: 4/77, loss: 1.040492296218872 2023-01-22 07:39:43.571461: step: 8/77, loss: 1.0503621101379395 2023-01-22 07:39:44.853573: step: 12/77, loss: 1.05849289894104 2023-01-22 07:39:46.180489: step: 16/77, loss: 1.0572280883789062 2023-01-22 07:39:47.497874: step: 20/77, loss: 1.0390177965164185 2023-01-22 07:39:48.792250: step: 24/77, loss: 1.0531857013702393 2023-01-22 07:39:50.067482: step: 28/77, loss: 1.057201862335205 2023-01-22 07:39:51.377649: step: 32/77, loss: 1.0431196689605713 2023-01-22 07:39:52.693378: step: 36/77, loss: 1.0291414260864258 2023-01-22 07:39:53.966838: step: 40/77, loss: 1.0287554264068604 2023-01-22 07:39:55.250117: step: 44/77, loss: 1.0200912952423096 2023-01-22 07:39:56.504290: step: 48/77, loss: 1.006379246711731 2023-01-22 07:39:57.806578: step: 52/77, loss: 1.0050897598266602 2023-01-22 07:39:59.107723: step: 56/77, loss: 0.9895217418670654 2023-01-22 07:40:00.422500: step: 60/77, loss: 0.9909875392913818 2023-01-22 07:40:01.747476: step: 64/77, loss: 0.9767402410507202 2023-01-22 07:40:03.009335: step: 68/77, loss: 0.9686396718025208 2023-01-22 07:40:04.350219: step: 72/77, loss: 0.9502049684524536 2023-01-22 07:40:05.716146: step: 76/77, loss: 0.948758065700531 2023-01-22 07:40:06.999700: step: 80/77, loss: 0.9231487512588501 2023-01-22 07:40:08.349655: step: 84/77, loss: 0.9294416904449463 2023-01-22 07:40:09.643695: step: 88/77, loss: 0.902397632598877 2023-01-22 07:40:10.964632: step: 92/77, loss: 0.8896970152854919 2023-01-22 07:40:12.251263: step: 96/77, loss: 0.8582167029380798 2023-01-22 07:40:13.534204: step: 100/77, loss: 0.8548195958137512 2023-01-22 07:40:14.807257: step: 104/77, loss: 0.8246533274650574 2023-01-22 07:40:16.134305: step: 108/77, loss: 0.8212116956710815 2023-01-22 07:40:17.438437: step: 112/77, loss: 0.8350183963775635 2023-01-22 07:40:18.712444: step: 116/77, loss: 0.7815088033676147 2023-01-22 07:40:20.013621: step: 120/77, loss: 0.7805500030517578 2023-01-22 07:40:21.345437: step: 124/77, loss: 0.7633838653564453 2023-01-22 07:40:22.587218: step: 128/77, loss: 0.7470266819000244 2023-01-22 07:40:23.936181: step: 132/77, loss: 0.7090791463851929 2023-01-22 07:40:25.248568: step: 136/77, loss: 0.6641875505447388 2023-01-22 07:40:26.502830: step: 140/77, loss: 0.7156832218170166 2023-01-22 07:40:27.791235: step: 144/77, loss: 0.6273938417434692 2023-01-22 07:40:29.100892: step: 148/77, loss: 0.6106159687042236 2023-01-22 07:40:30.435416: step: 152/77, loss: 0.6362186670303345 2023-01-22 07:40:31.765928: step: 156/77, loss: 0.5732383728027344 2023-01-22 07:40:33.041503: step: 160/77, loss: 0.5817826986312866 2023-01-22 07:40:34.400335: step: 164/77, loss: 0.5319560170173645 2023-01-22 07:40:35.698613: step: 168/77, loss: 0.5354362726211548 2023-01-22 07:40:36.997481: step: 172/77, loss: 0.462534099817276 2023-01-22 07:40:38.275873: step: 176/77, loss: 0.47266411781311035 2023-01-22 07:40:39.575972: step: 180/77, loss: 0.480622798204422 2023-01-22 07:40:40.906012: step: 184/77, loss: 0.3768722116947174 2023-01-22 07:40:42.188927: step: 188/77, loss: 0.35718005895614624 2023-01-22 07:40:43.488329: step: 192/77, loss: 0.3514782190322876 2023-01-22 07:40:44.782928: step: 196/77, loss: 0.316677451133728 2023-01-22 07:40:46.088920: step: 200/77, loss: 0.2975374460220337 2023-01-22 07:40:47.410519: step: 204/77, loss: 0.3182145059108734 2023-01-22 07:40:48.736448: step: 208/77, loss: 0.21221506595611572 2023-01-22 07:40:50.036489: step: 212/77, loss: 0.21120867133140564 2023-01-22 07:40:51.350747: step: 216/77, loss: 0.1704612374305725 2023-01-22 07:40:52.655147: step: 220/77, loss: 0.3029862642288208 2023-01-22 07:40:53.962140: step: 224/77, loss: 0.22104257345199585 2023-01-22 07:40:55.260168: step: 228/77, loss: 0.24535319209098816 2023-01-22 07:40:56.573538: step: 232/77, loss: 0.22974175214767456 2023-01-22 07:40:57.843623: step: 236/77, loss: 0.2581561803817749 2023-01-22 07:40:59.161189: step: 240/77, loss: 0.12163802981376648 2023-01-22 07:41:00.464468: step: 244/77, loss: 0.2190374732017517 2023-01-22 07:41:01.752295: step: 248/77, loss: 0.108123280107975 2023-01-22 07:41:03.096416: step: 252/77, loss: 0.20003128051757812 2023-01-22 07:41:04.389246: step: 256/77, loss: 0.22386214137077332 2023-01-22 07:41:05.653537: step: 260/77, loss: 0.206337109208107 2023-01-22 07:41:06.940566: step: 264/77, loss: 0.07493802160024643 2023-01-22 07:41:08.215206: step: 268/77, loss: 0.08836229145526886 2023-01-22 07:41:09.540549: step: 272/77, loss: 0.1717900037765503 2023-01-22 07:41:10.860886: step: 276/77, loss: 0.12574687600135803 2023-01-22 07:41:12.167652: step: 280/77, loss: 0.18826636672019958 2023-01-22 07:41:13.458550: step: 284/77, loss: 0.05894722789525986 2023-01-22 07:41:14.756009: step: 288/77, loss: 0.15189287066459656 2023-01-22 07:41:16.044585: step: 292/77, loss: 0.05881006643176079 2023-01-22 07:41:17.317370: step: 296/77, loss: 0.13599269092082977 2023-01-22 07:41:18.594132: step: 300/77, loss: 0.06342396885156631 2023-01-22 07:41:19.898772: step: 304/77, loss: 0.1103890985250473 2023-01-22 07:41:21.218051: step: 308/77, loss: 0.14803536236286163 2023-01-22 07:41:22.506476: step: 312/77, loss: 0.03357389196753502 2023-01-22 07:41:23.849974: step: 316/77, loss: 0.05214579403400421 2023-01-22 07:41:25.148671: step: 320/77, loss: 0.289453387260437 2023-01-22 07:41:26.468856: step: 324/77, loss: 0.07445473968982697 2023-01-22 07:41:27.742675: step: 328/77, loss: 0.06806229054927826 2023-01-22 07:41:29.008541: step: 332/77, loss: 0.09790605306625366 2023-01-22 07:41:30.269455: step: 336/77, loss: 0.30805832147598267 2023-01-22 07:41:31.554903: step: 340/77, loss: 0.17872123420238495 2023-01-22 07:41:32.847391: step: 344/77, loss: 0.06522324681282043 2023-01-22 07:41:34.171509: step: 348/77, loss: 0.031129013746976852 2023-01-22 07:41:35.523619: step: 352/77, loss: 0.07004313170909882 2023-01-22 07:41:36.893488: step: 356/77, loss: 0.05774754285812378 2023-01-22 07:41:38.208646: step: 360/77, loss: 0.14162006974220276 2023-01-22 07:41:39.463404: step: 364/77, loss: 0.13135913014411926 2023-01-22 07:41:40.756404: step: 368/77, loss: 0.1805039495229721 2023-01-22 07:41:42.176940: step: 372/77, loss: 0.04826679825782776 2023-01-22 07:41:43.474189: step: 376/77, loss: 0.12572729587554932 2023-01-22 07:41:44.770460: step: 380/77, loss: 0.10801220685243607 2023-01-22 07:41:46.059654: step: 384/77, loss: 0.07356271892786026 2023-01-22 07:41:47.354575: step: 388/77, loss: 0.18298789858818054 ================================================== Loss: 0.479 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:43:55.199037: step: 4/77, loss: 0.14832714200019836 2023-01-22 07:43:56.544806: step: 8/77, loss: 0.12168654799461365 2023-01-22 07:43:57.789083: step: 12/77, loss: 0.0857740044593811 2023-01-22 07:43:59.054029: step: 16/77, loss: 0.15103742480278015 2023-01-22 07:44:00.352034: step: 20/77, loss: 0.12808871269226074 2023-01-22 07:44:01.652623: step: 24/77, loss: 0.05073537304997444 2023-01-22 07:44:02.936801: step: 28/77, loss: 0.11223308742046356 2023-01-22 07:44:04.208440: step: 32/77, loss: 0.15117031335830688 2023-01-22 07:44:05.507148: step: 36/77, loss: 0.21810725331306458 2023-01-22 07:44:06.852080: step: 40/77, loss: 0.12143763899803162 2023-01-22 07:44:08.150768: step: 44/77, loss: 0.08966831862926483 2023-01-22 07:44:09.414540: step: 48/77, loss: 0.06273593008518219 2023-01-22 07:44:10.674736: step: 52/77, loss: 0.13776537775993347 2023-01-22 07:44:11.953234: step: 56/77, loss: 0.12719234824180603 2023-01-22 07:44:13.212949: step: 60/77, loss: 0.057064518332481384 2023-01-22 07:44:14.504140: step: 64/77, loss: 0.16016772389411926 2023-01-22 07:44:15.798995: step: 68/77, loss: 0.11384375393390656 2023-01-22 07:44:17.145029: step: 72/77, loss: 0.08540131151676178 2023-01-22 07:44:18.425504: step: 76/77, loss: 0.12404485046863556 2023-01-22 07:44:19.708277: step: 80/77, loss: 0.3094620704650879 2023-01-22 07:44:21.036463: step: 84/77, loss: 0.1255866438150406 2023-01-22 07:44:22.358321: step: 88/77, loss: 0.1157902330160141 2023-01-22 07:44:23.665056: step: 92/77, loss: 0.22449104487895966 2023-01-22 07:44:24.936640: step: 96/77, loss: 0.036930665373802185 2023-01-22 07:44:26.219565: step: 100/77, loss: 0.0713043063879013 2023-01-22 07:44:27.526628: step: 104/77, loss: 0.12061722576618195 2023-01-22 07:44:28.821082: step: 108/77, loss: 0.051825400441884995 2023-01-22 07:44:30.096290: step: 112/77, loss: 0.037453874945640564 2023-01-22 07:44:31.360250: step: 116/77, loss: 0.16997961699962616 2023-01-22 07:44:32.687264: step: 120/77, loss: 0.055220380425453186 2023-01-22 07:44:33.996863: step: 124/77, loss: 0.1438325196504593 2023-01-22 07:44:35.305699: step: 128/77, loss: 0.10651086270809174 2023-01-22 07:44:36.609401: step: 132/77, loss: 0.09997448325157166 2023-01-22 07:44:37.886075: step: 136/77, loss: 0.12448174506425858 2023-01-22 07:44:39.221547: step: 140/77, loss: 0.1139465942978859 2023-01-22 07:44:40.547882: step: 144/77, loss: 0.10619348287582397 2023-01-22 07:44:41.828686: step: 148/77, loss: 0.11222274601459503 2023-01-22 07:44:43.136807: step: 152/77, loss: 0.10357049852609634 2023-01-22 07:44:44.407742: step: 156/77, loss: 0.11287228763103485 2023-01-22 07:44:45.750100: step: 160/77, loss: 0.15079429745674133 2023-01-22 07:44:47.041648: step: 164/77, loss: 0.11793462932109833 2023-01-22 07:44:48.366842: step: 168/77, loss: 0.05568506568670273 2023-01-22 07:44:49.719593: step: 172/77, loss: 0.1194445788860321 2023-01-22 07:44:51.024129: step: 176/77, loss: 0.03512217849493027 2023-01-22 07:44:52.336218: step: 180/77, loss: 0.14646495878696442 2023-01-22 07:44:53.692742: step: 184/77, loss: 0.07254618406295776 2023-01-22 07:44:55.010063: step: 188/77, loss: 0.038304783403873444 2023-01-22 07:44:56.287256: step: 192/77, loss: 0.0619526244699955 2023-01-22 07:44:57.633667: step: 196/77, loss: 0.16268327832221985 2023-01-22 07:44:58.916058: step: 200/77, loss: 0.1339617669582367 2023-01-22 07:45:00.226505: step: 204/77, loss: 0.06768632680177689 2023-01-22 07:45:01.487736: step: 208/77, loss: 0.06987083703279495 2023-01-22 07:45:02.794564: step: 212/77, loss: 0.061561498790979385 2023-01-22 07:45:04.155045: step: 216/77, loss: 0.04402967542409897 2023-01-22 07:45:05.477698: step: 220/77, loss: 0.09891396760940552 2023-01-22 07:45:06.786378: step: 224/77, loss: 0.17626222968101501 2023-01-22 07:45:08.117972: step: 228/77, loss: 0.028642630204558372 2023-01-22 07:45:09.407992: step: 232/77, loss: 0.10021352767944336 2023-01-22 07:45:10.688740: step: 236/77, loss: 0.0462426021695137 2023-01-22 07:45:12.018257: step: 240/77, loss: 0.09565180540084839 2023-01-22 07:45:13.354409: step: 244/77, loss: 0.10396598279476166 2023-01-22 07:45:14.699279: step: 248/77, loss: 0.11824971437454224 2023-01-22 07:45:15.986559: step: 252/77, loss: 0.11199741065502167 2023-01-22 07:45:17.279844: step: 256/77, loss: 0.08092804253101349 2023-01-22 07:45:18.548130: step: 260/77, loss: 0.061664629727602005 2023-01-22 07:45:19.855667: step: 264/77, loss: 0.07963520288467407 2023-01-22 07:45:21.156250: step: 268/77, loss: 0.1210048645734787 2023-01-22 07:45:22.487847: step: 272/77, loss: 0.12631292641162872 2023-01-22 07:45:23.757622: step: 276/77, loss: 0.07200046628713608 2023-01-22 07:45:25.058855: step: 280/77, loss: 0.04203527420759201 2023-01-22 07:45:26.352759: step: 284/77, loss: 0.11212755739688873 2023-01-22 07:45:27.656562: step: 288/77, loss: 0.03090221807360649 2023-01-22 07:45:29.003415: step: 292/77, loss: 0.05292743444442749 2023-01-22 07:45:30.289039: step: 296/77, loss: 0.03338020667433739 2023-01-22 07:45:31.563453: step: 300/77, loss: 0.12936441600322723 2023-01-22 07:45:32.841406: step: 304/77, loss: 0.25219258666038513 2023-01-22 07:45:34.187704: step: 308/77, loss: 0.04136262834072113 2023-01-22 07:45:35.519755: step: 312/77, loss: 0.1652255654335022 2023-01-22 07:45:36.824328: step: 316/77, loss: 0.4338652193546295 2023-01-22 07:45:38.149583: step: 320/77, loss: 0.03826376423239708 2023-01-22 07:45:39.454114: step: 324/77, loss: 0.16006599366664886 2023-01-22 07:45:40.790442: step: 328/77, loss: 0.08961412310600281 2023-01-22 07:45:42.138707: step: 332/77, loss: 0.07449474930763245 2023-01-22 07:45:43.444080: step: 336/77, loss: 0.11611238121986389 2023-01-22 07:45:44.743671: step: 340/77, loss: 0.035404592752456665 2023-01-22 07:45:46.025001: step: 344/77, loss: 0.09039437025785446 2023-01-22 07:45:47.303490: step: 348/77, loss: 0.14755980670452118 2023-01-22 07:45:48.623026: step: 352/77, loss: 0.04100664332509041 2023-01-22 07:45:49.968758: step: 356/77, loss: 0.033255741000175476 2023-01-22 07:45:51.338557: step: 360/77, loss: 0.06925242394208908 2023-01-22 07:45:52.695929: step: 364/77, loss: 0.16693246364593506 2023-01-22 07:45:54.036530: step: 368/77, loss: 0.08979891240596771 2023-01-22 07:45:55.379226: step: 372/77, loss: 0.12025587260723114 2023-01-22 07:45:56.696688: step: 376/77, loss: 0.0694204643368721 2023-01-22 07:45:58.015531: step: 380/77, loss: 0.18677574396133423 2023-01-22 07:45:59.313725: step: 384/77, loss: 0.11339643597602844 2023-01-22 07:46:00.632173: step: 388/77, loss: 0.06470693647861481 ================================================== Loss: 0.107 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 2 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:47:43.895821: step: 4/77, loss: 0.08788135647773743 2023-01-22 07:47:45.207282: step: 8/77, loss: 0.08346004039049149 2023-01-22 07:47:46.445048: step: 12/77, loss: 0.17841340601444244 2023-01-22 07:47:47.732115: step: 16/77, loss: 0.054879773408174515 2023-01-22 07:47:49.025371: step: 20/77, loss: 0.058798372745513916 2023-01-22 07:47:50.338572: step: 24/77, loss: 0.08873613178730011 2023-01-22 07:47:51.604576: step: 28/77, loss: 0.04994595795869827 2023-01-22 07:47:52.913162: step: 32/77, loss: 0.17656423151493073 2023-01-22 07:47:54.217349: step: 36/77, loss: 0.18922853469848633 2023-01-22 07:47:55.533813: step: 40/77, loss: 0.17291899025440216 2023-01-22 07:47:56.856123: step: 44/77, loss: 0.0713772103190422 2023-01-22 07:47:58.120381: step: 48/77, loss: 0.03998291864991188 2023-01-22 07:47:59.407575: step: 52/77, loss: 0.05664319917559624 2023-01-22 07:48:00.708619: step: 56/77, loss: 0.23095250129699707 2023-01-22 07:48:02.045014: step: 60/77, loss: 0.10259470343589783 2023-01-22 07:48:03.344255: step: 64/77, loss: 0.07569926232099533 2023-01-22 07:48:04.605961: step: 68/77, loss: 0.044563233852386475 2023-01-22 07:48:05.925939: step: 72/77, loss: 0.06827056407928467 2023-01-22 07:48:07.205603: step: 76/77, loss: 0.17188124358654022 2023-01-22 07:48:08.478943: step: 80/77, loss: 0.25089898705482483 2023-01-22 07:48:09.773103: step: 84/77, loss: 0.09476040303707123 2023-01-22 07:48:11.099862: step: 88/77, loss: 0.0689239576458931 2023-01-22 07:48:12.416035: step: 92/77, loss: 0.11733505129814148 2023-01-22 07:48:13.759274: step: 96/77, loss: 0.033800750970840454 2023-01-22 07:48:15.009437: step: 100/77, loss: 0.059796176850795746 2023-01-22 07:48:16.291556: step: 104/77, loss: 0.11064587533473969 2023-01-22 07:48:17.585077: step: 108/77, loss: 0.0926768034696579 2023-01-22 07:48:18.888459: step: 112/77, loss: 0.09673449397087097 2023-01-22 07:48:20.167640: step: 116/77, loss: 0.1307319700717926 2023-01-22 07:48:21.509611: step: 120/77, loss: 0.10335015505552292 2023-01-22 07:48:22.870413: step: 124/77, loss: 0.100394606590271 2023-01-22 07:48:24.208890: step: 128/77, loss: 0.07573147118091583 2023-01-22 07:48:25.507411: step: 132/77, loss: 0.12420970946550369 2023-01-22 07:48:26.814928: step: 136/77, loss: 0.04513493925333023 2023-01-22 07:48:28.065238: step: 140/77, loss: 0.07229103893041611 2023-01-22 07:48:29.360522: step: 144/77, loss: 0.055163364857435226 2023-01-22 07:48:30.657952: step: 148/77, loss: 0.06094565987586975 2023-01-22 07:48:31.927550: step: 152/77, loss: 0.018917806446552277 2023-01-22 07:48:33.209226: step: 156/77, loss: 0.07961555570363998 2023-01-22 07:48:34.502944: step: 160/77, loss: 0.03949800878763199 2023-01-22 07:48:35.789732: step: 164/77, loss: 0.0688449963927269 2023-01-22 07:48:37.086661: step: 168/77, loss: 0.009676387533545494 2023-01-22 07:48:38.403047: step: 172/77, loss: 0.07024532556533813 2023-01-22 07:48:39.687360: step: 176/77, loss: 0.017646795138716698 2023-01-22 07:48:41.008600: step: 180/77, loss: 0.01941852644085884 2023-01-22 07:48:42.296047: step: 184/77, loss: 0.011306710541248322 2023-01-22 07:48:43.610025: step: 188/77, loss: 0.008137895725667477 2023-01-22 07:48:44.913894: step: 192/77, loss: 0.08202481269836426 2023-01-22 07:48:46.229302: step: 196/77, loss: 0.09872758388519287 2023-01-22 07:48:47.507744: step: 200/77, loss: 0.022330768406391144 2023-01-22 07:48:48.803604: step: 204/77, loss: 0.07424715161323547 2023-01-22 07:48:50.155286: step: 208/77, loss: 0.04477814584970474 2023-01-22 07:48:51.455282: step: 212/77, loss: 0.06019924581050873 2023-01-22 07:48:52.739066: step: 216/77, loss: 0.01825646311044693 2023-01-22 07:48:54.066318: step: 220/77, loss: 0.04087033122777939 2023-01-22 07:48:55.371428: step: 224/77, loss: 0.01391011942178011 2023-01-22 07:48:56.661863: step: 228/77, loss: 0.07943939417600632 2023-01-22 07:48:58.010501: step: 232/77, loss: 0.010478628799319267 2023-01-22 07:48:59.339010: step: 236/77, loss: 0.1004880741238594 2023-01-22 07:49:00.654138: step: 240/77, loss: 0.10186842828989029 2023-01-22 07:49:02.019311: step: 244/77, loss: 0.07950830459594727 2023-01-22 07:49:03.294377: step: 248/77, loss: 0.0045279888436198235 2023-01-22 07:49:04.585571: step: 252/77, loss: 0.0602598637342453 2023-01-22 07:49:05.844603: step: 256/77, loss: 0.36025530099868774 2023-01-22 07:49:07.136677: step: 260/77, loss: 0.061081767082214355 2023-01-22 07:49:08.454901: step: 264/77, loss: 0.02526148408651352 2023-01-22 07:49:09.738476: step: 268/77, loss: 0.05052194744348526 2023-01-22 07:49:11.050490: step: 272/77, loss: 0.039078567177057266 2023-01-22 07:49:12.365157: step: 276/77, loss: 0.04826498031616211 2023-01-22 07:49:13.716956: step: 280/77, loss: 0.03132627531886101 2023-01-22 07:49:15.007424: step: 284/77, loss: 0.037446968257427216 2023-01-22 07:49:16.286982: step: 288/77, loss: 0.07468254119157791 2023-01-22 07:49:17.597252: step: 292/77, loss: 0.07227995991706848 2023-01-22 07:49:18.870286: step: 296/77, loss: 0.015393940731883049 2023-01-22 07:49:20.131687: step: 300/77, loss: 0.009335112757980824 2023-01-22 07:49:21.472036: step: 304/77, loss: 0.050310514867305756 2023-01-22 07:49:22.772495: step: 308/77, loss: 0.1641617864370346 2023-01-22 07:49:24.117461: step: 312/77, loss: 0.048241421580314636 2023-01-22 07:49:25.430918: step: 316/77, loss: 0.04193336144089699 2023-01-22 07:49:26.727131: step: 320/77, loss: 0.01817990653216839 2023-01-22 07:49:28.007691: step: 324/77, loss: 0.09641657024621964 2023-01-22 07:49:29.301847: step: 328/77, loss: 0.08564266562461853 2023-01-22 07:49:30.615022: step: 332/77, loss: 0.02044866979122162 2023-01-22 07:49:31.924944: step: 336/77, loss: 0.01461329497396946 2023-01-22 07:49:33.207799: step: 340/77, loss: 0.020636137574911118 2023-01-22 07:49:34.523630: step: 344/77, loss: 0.0273189265280962 2023-01-22 07:49:35.879678: step: 348/77, loss: 0.01171826757490635 2023-01-22 07:49:37.153677: step: 352/77, loss: 0.030976679176092148 2023-01-22 07:49:38.419897: step: 356/77, loss: 0.04481905698776245 2023-01-22 07:49:39.706810: step: 360/77, loss: 0.016529062762856483 2023-01-22 07:49:40.988062: step: 364/77, loss: 0.02783080004155636 2023-01-22 07:49:42.297844: step: 368/77, loss: 0.3587649166584015 2023-01-22 07:49:43.608896: step: 372/77, loss: 0.03467598557472229 2023-01-22 07:49:44.901813: step: 376/77, loss: 0.06850849837064743 2023-01-22 07:49:46.211097: step: 380/77, loss: 0.01407004427164793 2023-01-22 07:49:47.494661: step: 384/77, loss: 0.0250600166618824 2023-01-22 07:49:48.858812: step: 388/77, loss: 0.07285062223672867 ================================================== Loss: 0.073 -------------------- Dev Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5833333333333334, 'f1': 0.7216494845360825}, 'slot': {'p': 0.45454545454545453, 'r': 0.03780718336483932, 'f1': 0.06980802792321117}, 'combined': 0.05037692736726579, 'epoch': 2} Test Chinese: {'template': {'p': 0.7375, 'r': 0.4609375, 'f1': 0.5673076923076923}, 'slot': {'p': 0.26666666666666666, 'r': 0.01098901098901099, 'f1': 0.021108179419525065}, 'combined': 0.011974832555307489, 'epoch': 2} Dev Korean: {'template': {'p': 0.9210526315789473, 'r': 0.5833333333333334, 'f1': 0.7142857142857143}, 'slot': {'p': 0.43478260869565216, 'r': 0.03780718336483932, 'f1': 0.06956521739130435}, 'combined': 0.04968944099378882, 'epoch': 2} Test Korean: {'template': {'p': 0.7375, 'r': 0.4609375, 'f1': 0.5673076923076923}, 'slot': {'p': 0.26666666666666666, 'r': 0.01098901098901099, 'f1': 0.021108179419525065}, 'combined': 0.011974832555307489, 'epoch': 2} Dev Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5833333333333334, 'f1': 0.7216494845360825}, 'slot': {'p': 0.45454545454545453, 'r': 0.03780718336483932, 'f1': 0.06980802792321117}, 'combined': 0.05037692736726579, 'epoch': 2} Test Russian: {'template': {'p': 0.7375, 'r': 0.4609375, 'f1': 0.5673076923076923}, 'slot': {'p': 0.26666666666666666, 'r': 0.01098901098901099, 'f1': 0.021108179419525065}, 'combined': 0.011974832555307489, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5833333333333334, 'f1': 0.7216494845360825}, 'slot': {'p': 0.45454545454545453, 'r': 0.03780718336483932, 'f1': 0.06980802792321117}, 'combined': 0.05037692736726579, 'epoch': 2} Test for Chinese: {'template': {'p': 0.7375, 'r': 0.4609375, 'f1': 0.5673076923076923}, 'slot': {'p': 0.26666666666666666, 'r': 0.01098901098901099, 'f1': 0.021108179419525065}, 'combined': 0.011974832555307489, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 0.9210526315789473, 'r': 0.5833333333333334, 'f1': 0.7142857142857143}, 'slot': {'p': 0.43478260869565216, 'r': 0.03780718336483932, 'f1': 0.06956521739130435}, 'combined': 0.04968944099378882, 'epoch': 2} Test for Korean: {'template': {'p': 0.7375, 'r': 0.4609375, 'f1': 0.5673076923076923}, 'slot': {'p': 0.26666666666666666, 'r': 0.01098901098901099, 'f1': 0.021108179419525065}, 'combined': 0.011974832555307489, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5833333333333334, 'f1': 0.7216494845360825}, 'slot': {'p': 0.45454545454545453, 'r': 0.03780718336483932, 'f1': 0.06980802792321117}, 'combined': 0.05037692736726579, 'epoch': 2} Test for Russian: {'template': {'p': 0.7375, 'r': 0.4609375, 'f1': 0.5673076923076923}, 'slot': {'p': 0.26666666666666666, 'r': 0.01098901098901099, 'f1': 0.021108179419525065}, 'combined': 0.011974832555307489, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:51:55.061592: step: 4/77, loss: 0.03292298689484596 2023-01-22 07:51:56.395165: step: 8/77, loss: 0.033934395760297775 2023-01-22 07:51:57.734356: step: 12/77, loss: 0.10713164508342743 2023-01-22 07:51:59.053297: step: 16/77, loss: 0.08980081230401993 2023-01-22 07:52:00.335462: step: 20/77, loss: 0.3165131211280823 2023-01-22 07:52:01.674195: step: 24/77, loss: 0.06864053755998611 2023-01-22 07:52:03.056831: step: 28/77, loss: 0.02137758769094944 2023-01-22 07:52:04.320933: step: 32/77, loss: 0.02520819380879402 2023-01-22 07:52:05.626259: step: 36/77, loss: 0.026509536430239677 2023-01-22 07:52:06.962520: step: 40/77, loss: 0.015540415421128273 2023-01-22 07:52:08.286657: step: 44/77, loss: 0.03514610230922699 2023-01-22 07:52:09.591535: step: 48/77, loss: 0.016079070046544075 2023-01-22 07:52:10.874658: step: 52/77, loss: 0.021822217851877213 2023-01-22 07:52:12.187047: step: 56/77, loss: 0.004361532628536224 2023-01-22 07:52:13.502865: step: 60/77, loss: 0.08357305824756622 2023-01-22 07:52:14.859215: step: 64/77, loss: 0.02250819467008114 2023-01-22 07:52:16.221513: step: 68/77, loss: 0.0820513665676117 2023-01-22 07:52:17.504470: step: 72/77, loss: 0.025214381515979767 2023-01-22 07:52:18.854985: step: 76/77, loss: 0.013879728503525257 2023-01-22 07:52:20.152732: step: 80/77, loss: 0.03811607137322426 2023-01-22 07:52:21.458090: step: 84/77, loss: 0.023846963420510292 2023-01-22 07:52:22.731367: step: 88/77, loss: 0.10572272539138794 2023-01-22 07:52:24.043186: step: 92/77, loss: 0.027351083233952522 2023-01-22 07:52:25.387579: step: 96/77, loss: 0.013311273418366909 2023-01-22 07:52:26.666129: step: 100/77, loss: 0.09269560128450394 2023-01-22 07:52:28.004681: step: 104/77, loss: 0.009473717771470547 2023-01-22 07:52:29.326793: step: 108/77, loss: 0.006472132168710232 2023-01-22 07:52:30.597040: step: 112/77, loss: 0.0017005936242640018 2023-01-22 07:52:31.937780: step: 116/77, loss: 0.05240405350923538 2023-01-22 07:52:33.223879: step: 120/77, loss: 0.05236407369375229 2023-01-22 07:52:34.529361: step: 124/77, loss: 0.010209540836513042 2023-01-22 07:52:35.890103: step: 128/77, loss: 0.139913409948349 2023-01-22 07:52:37.296482: step: 132/77, loss: 0.031847041100263596 2023-01-22 07:52:38.593109: step: 136/77, loss: 0.07764370739459991 2023-01-22 07:52:39.884759: step: 140/77, loss: 0.026239609345793724 2023-01-22 07:52:41.161845: step: 144/77, loss: 0.04461614042520523 2023-01-22 07:52:42.484658: step: 148/77, loss: 0.003413321916013956 2023-01-22 07:52:43.809461: step: 152/77, loss: 0.00506990123540163 2023-01-22 07:52:45.149402: step: 156/77, loss: 0.07757072895765305 2023-01-22 07:52:46.402786: step: 160/77, loss: 0.005857846699655056 2023-01-22 07:52:47.730667: step: 164/77, loss: 0.05243443325161934 2023-01-22 07:52:49.040584: step: 168/77, loss: 0.007364882621914148 2023-01-22 07:52:50.307102: step: 172/77, loss: 0.007191166281700134 2023-01-22 07:52:51.647719: step: 176/77, loss: 0.09894745796918869 2023-01-22 07:52:52.978363: step: 180/77, loss: 0.030295606702566147 2023-01-22 07:52:54.315548: step: 184/77, loss: 0.0666690245270729 2023-01-22 07:52:55.653582: step: 188/77, loss: 0.02367621660232544 2023-01-22 07:52:56.943001: step: 192/77, loss: 0.04850950092077255 2023-01-22 07:52:58.259692: step: 196/77, loss: 0.007937368005514145 2023-01-22 07:52:59.615193: step: 200/77, loss: 0.004608646966516972 2023-01-22 07:53:00.880866: step: 204/77, loss: 0.1033419519662857 2023-01-22 07:53:02.180561: step: 208/77, loss: 0.07606571912765503 2023-01-22 07:53:03.492135: step: 212/77, loss: 0.010289874859154224 2023-01-22 07:53:04.774465: step: 216/77, loss: 0.2712690234184265 2023-01-22 07:53:06.027738: step: 220/77, loss: 0.01715884730219841 2023-01-22 07:53:07.360659: step: 224/77, loss: 0.009004155173897743 2023-01-22 07:53:08.655772: step: 228/77, loss: 0.03872823715209961 2023-01-22 07:53:10.023176: step: 232/77, loss: 0.08715996891260147 2023-01-22 07:53:11.326696: step: 236/77, loss: 0.0037528513930737972 2023-01-22 07:53:12.640858: step: 240/77, loss: 0.028906524181365967 2023-01-22 07:53:14.003752: step: 244/77, loss: 0.013030106201767921 2023-01-22 07:53:15.342998: step: 248/77, loss: 0.03233833983540535 2023-01-22 07:53:16.646481: step: 252/77, loss: 0.05106557160615921 2023-01-22 07:53:17.943009: step: 256/77, loss: 0.013097782619297504 2023-01-22 07:53:19.226136: step: 260/77, loss: 0.01939336583018303 2023-01-22 07:53:20.527696: step: 264/77, loss: 0.03218713402748108 2023-01-22 07:53:21.815158: step: 268/77, loss: 0.03535167872905731 2023-01-22 07:53:23.134721: step: 272/77, loss: 0.017765024676918983 2023-01-22 07:53:24.444930: step: 276/77, loss: 0.062223754823207855 2023-01-22 07:53:25.753386: step: 280/77, loss: 0.014603447169065475 2023-01-22 07:53:27.033585: step: 284/77, loss: 0.03279239684343338 2023-01-22 07:53:28.304003: step: 288/77, loss: 0.03463967889547348 2023-01-22 07:53:29.585365: step: 292/77, loss: 0.08478177338838577 2023-01-22 07:53:30.948663: step: 296/77, loss: 0.002551364479586482 2023-01-22 07:53:32.254758: step: 300/77, loss: 0.006090979091823101 2023-01-22 07:53:33.564625: step: 304/77, loss: 0.0030389754101634026 2023-01-22 07:53:34.933580: step: 308/77, loss: 0.05313324183225632 2023-01-22 07:53:36.246946: step: 312/77, loss: 0.021794088184833527 2023-01-22 07:53:37.540833: step: 316/77, loss: 0.02054758369922638 2023-01-22 07:53:38.854632: step: 320/77, loss: 0.041535671800374985 2023-01-22 07:53:40.184634: step: 324/77, loss: 0.025039827451109886 2023-01-22 07:53:41.520333: step: 328/77, loss: 0.023139623925089836 2023-01-22 07:53:42.814038: step: 332/77, loss: 0.01770506612956524 2023-01-22 07:53:44.147084: step: 336/77, loss: 0.0133819580078125 2023-01-22 07:53:45.451948: step: 340/77, loss: 0.10492324829101562 2023-01-22 07:53:46.804294: step: 344/77, loss: 0.04167582839727402 2023-01-22 07:53:48.102206: step: 348/77, loss: 0.07900797575712204 2023-01-22 07:53:49.419784: step: 352/77, loss: 0.00408164644613862 2023-01-22 07:53:50.782247: step: 356/77, loss: 0.04344362020492554 2023-01-22 07:53:52.129027: step: 360/77, loss: 0.0015304482076317072 2023-01-22 07:53:53.412206: step: 364/77, loss: 0.005748497322201729 2023-01-22 07:53:54.779575: step: 368/77, loss: 0.10104496031999588 2023-01-22 07:53:56.089915: step: 372/77, loss: 0.02256450429558754 2023-01-22 07:53:57.335593: step: 376/77, loss: 0.0722183957695961 2023-01-22 07:53:58.628418: step: 380/77, loss: 0.041157275438308716 2023-01-22 07:53:59.924030: step: 384/77, loss: 0.10450397431850433 2023-01-22 07:54:01.267045: step: 388/77, loss: 0.008872132748365402 ================================================== Loss: 0.043 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 3} Test Chinese: {'template': {'p': 0.9655172413793104, 'r': 0.4375, 'f1': 0.6021505376344085}, 'slot': {'p': 0.7, 'r': 0.01282051282051282, 'f1': 0.025179856115107917}, 'combined': 0.015162063897269281, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 3} Test Korean: {'template': {'p': 0.9642857142857143, 'r': 0.421875, 'f1': 0.5869565217391304}, 'slot': {'p': 0.7142857142857143, 'r': 0.013736263736263736, 'f1': 0.026954177897574125}, 'combined': 0.015820930505097856, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 3} Test Russian: {'template': {'p': 0.9642857142857143, 'r': 0.421875, 'f1': 0.5869565217391304}, 'slot': {'p': 0.7, 'r': 0.01282051282051282, 'f1': 0.025179856115107917}, 'combined': 0.014779480763215514, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} New best chinese model... New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 3} Test for Chinese: {'template': {'p': 0.9655172413793104, 'r': 0.4375, 'f1': 0.6021505376344085}, 'slot': {'p': 0.7, 'r': 0.01282051282051282, 'f1': 0.025179856115107917}, 'combined': 0.015162063897269281, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 3} Test for Korean: {'template': {'p': 0.9642857142857143, 'r': 0.421875, 'f1': 0.5869565217391304}, 'slot': {'p': 0.7142857142857143, 'r': 0.013736263736263736, 'f1': 0.026954177897574125}, 'combined': 0.015820930505097856, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5833333333333334, 'f1': 0.7216494845360825}, 'slot': {'p': 0.45454545454545453, 'r': 0.03780718336483932, 'f1': 0.06980802792321117}, 'combined': 0.05037692736726579, 'epoch': 2} Test for Russian: {'template': {'p': 0.7375, 'r': 0.4609375, 'f1': 0.5673076923076923}, 'slot': {'p': 0.26666666666666666, 'r': 0.01098901098901099, 'f1': 0.021108179419525065}, 'combined': 0.011974832555307489, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 4 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:56:01.694985: step: 4/77, loss: 0.03197958320379257 2023-01-22 07:56:02.985944: step: 8/77, loss: 0.012238221243023872 2023-01-22 07:56:04.247106: step: 12/77, loss: 0.012658842839300632 2023-01-22 07:56:05.545580: step: 16/77, loss: 0.05696278437972069 2023-01-22 07:56:06.873492: step: 20/77, loss: 0.06434786319732666 2023-01-22 07:56:08.237421: step: 24/77, loss: 0.027839547023177147 2023-01-22 07:56:09.594514: step: 28/77, loss: 0.004860618617385626 2023-01-22 07:56:10.975335: step: 32/77, loss: 0.09483452886343002 2023-01-22 07:56:12.260172: step: 36/77, loss: 0.019608458504080772 2023-01-22 07:56:13.551297: step: 40/77, loss: 0.01649227924644947 2023-01-22 07:56:14.895119: step: 44/77, loss: 0.07741818577051163 2023-01-22 07:56:16.223405: step: 48/77, loss: 0.043329864740371704 2023-01-22 07:56:17.542010: step: 52/77, loss: 0.03802216053009033 2023-01-22 07:56:18.853377: step: 56/77, loss: 0.14712420105934143 2023-01-22 07:56:20.145419: step: 60/77, loss: 0.021073415875434875 2023-01-22 07:56:21.418920: step: 64/77, loss: 0.10768307000398636 2023-01-22 07:56:22.727922: step: 68/77, loss: 0.006619085557758808 2023-01-22 07:56:24.042905: step: 72/77, loss: 0.025189127773046494 2023-01-22 07:56:25.350896: step: 76/77, loss: 0.01855679415166378 2023-01-22 07:56:26.691344: step: 80/77, loss: 0.008611946366727352 2023-01-22 07:56:27.975550: step: 84/77, loss: 0.004469956737011671 2023-01-22 07:56:29.269758: step: 88/77, loss: 0.018613183870911598 2023-01-22 07:56:30.599662: step: 92/77, loss: 0.16133186221122742 2023-01-22 07:56:31.889250: step: 96/77, loss: 0.008658718317747116 2023-01-22 07:56:33.233614: step: 100/77, loss: 0.020171891897916794 2023-01-22 07:56:34.553567: step: 104/77, loss: 0.009985811077058315 2023-01-22 07:56:35.897345: step: 108/77, loss: 0.03879367187619209 2023-01-22 07:56:37.203842: step: 112/77, loss: 0.05209491029381752 2023-01-22 07:56:38.477195: step: 116/77, loss: 0.007992057129740715 2023-01-22 07:56:39.764439: step: 120/77, loss: 0.022754168137907982 2023-01-22 07:56:41.120397: step: 124/77, loss: 0.017588071525096893 2023-01-22 07:56:42.450215: step: 128/77, loss: 0.025300100445747375 2023-01-22 07:56:43.759140: step: 132/77, loss: 0.008288135752081871 2023-01-22 07:56:45.050265: step: 136/77, loss: 0.01869431883096695 2023-01-22 07:56:46.314275: step: 140/77, loss: 0.07826438546180725 2023-01-22 07:56:47.613779: step: 144/77, loss: 0.018914898857474327 2023-01-22 07:56:48.925629: step: 148/77, loss: 0.14493244886398315 2023-01-22 07:56:50.200676: step: 152/77, loss: 0.0015709679573774338 2023-01-22 07:56:51.531429: step: 156/77, loss: 0.12118136137723923 2023-01-22 07:56:52.878351: step: 160/77, loss: 0.03633274883031845 2023-01-22 07:56:54.216248: step: 164/77, loss: 0.018765099346637726 2023-01-22 07:56:55.598865: step: 168/77, loss: 0.0025905310176312923 2023-01-22 07:56:56.936863: step: 172/77, loss: 0.0021830382756888866 2023-01-22 07:56:58.228450: step: 176/77, loss: 0.009825218468904495 2023-01-22 07:56:59.502868: step: 180/77, loss: 0.016480347141623497 2023-01-22 07:57:00.787192: step: 184/77, loss: 0.03498028963804245 2023-01-22 07:57:02.084662: step: 188/77, loss: 0.003769653383642435 2023-01-22 07:57:03.363702: step: 192/77, loss: 0.025297515094280243 2023-01-22 07:57:04.742918: step: 196/77, loss: 0.019557366147637367 2023-01-22 07:57:06.080127: step: 200/77, loss: 0.018995683640241623 2023-01-22 07:57:07.426550: step: 204/77, loss: 0.0028202475514262915 2023-01-22 07:57:08.681676: step: 208/77, loss: 0.016043715178966522 2023-01-22 07:57:10.031359: step: 212/77, loss: 0.06249503046274185 2023-01-22 07:57:11.392422: step: 216/77, loss: 0.015046648681163788 2023-01-22 07:57:12.737570: step: 220/77, loss: 0.013907751999795437 2023-01-22 07:57:14.043066: step: 224/77, loss: 0.045797016471624374 2023-01-22 07:57:15.373312: step: 228/77, loss: 0.07333850115537643 2023-01-22 07:57:16.750969: step: 232/77, loss: 0.010844893753528595 2023-01-22 07:57:18.073993: step: 236/77, loss: 0.03742639720439911 2023-01-22 07:57:19.394724: step: 240/77, loss: 0.05061040818691254 2023-01-22 07:57:20.731537: step: 244/77, loss: 0.02508923038840294 2023-01-22 07:57:22.092974: step: 248/77, loss: 0.060133058577775955 2023-01-22 07:57:23.446921: step: 252/77, loss: 0.04423247650265694 2023-01-22 07:57:24.722283: step: 256/77, loss: 0.04805321246385574 2023-01-22 07:57:26.040246: step: 260/77, loss: 0.05071249231696129 2023-01-22 07:57:27.358838: step: 264/77, loss: 0.01529052946716547 2023-01-22 07:57:28.685955: step: 268/77, loss: 0.001533987233415246 2023-01-22 07:57:29.995542: step: 272/77, loss: 0.059901487082242966 2023-01-22 07:57:31.357214: step: 276/77, loss: 0.004801694769412279 2023-01-22 07:57:32.670377: step: 280/77, loss: 0.029555046930909157 2023-01-22 07:57:33.992048: step: 284/77, loss: 0.014943293295800686 2023-01-22 07:57:35.303267: step: 288/77, loss: 0.12821191549301147 2023-01-22 07:57:36.659964: step: 292/77, loss: 0.12393166869878769 2023-01-22 07:57:37.978675: step: 296/77, loss: 0.005087250843644142 2023-01-22 07:57:39.295032: step: 300/77, loss: 0.06435203552246094 2023-01-22 07:57:40.577061: step: 304/77, loss: 0.10260597616434097 2023-01-22 07:57:41.902750: step: 308/77, loss: 0.017721746116876602 2023-01-22 07:57:43.211603: step: 312/77, loss: 0.05021507292985916 2023-01-22 07:57:44.472343: step: 316/77, loss: 0.029303917661309242 2023-01-22 07:57:45.801011: step: 320/77, loss: 0.1269276887178421 2023-01-22 07:57:47.154226: step: 324/77, loss: 0.06324280053377151 2023-01-22 07:57:48.431967: step: 328/77, loss: 0.029106508940458298 2023-01-22 07:57:49.754619: step: 332/77, loss: 0.04430750384926796 2023-01-22 07:57:51.069434: step: 336/77, loss: 0.05504322797060013 2023-01-22 07:57:52.395852: step: 340/77, loss: 0.02148296684026718 2023-01-22 07:57:53.756676: step: 344/77, loss: 0.035367049276828766 2023-01-22 07:57:55.074403: step: 348/77, loss: 0.019233345985412598 2023-01-22 07:57:56.382192: step: 352/77, loss: 0.008263535797595978 2023-01-22 07:57:57.714593: step: 356/77, loss: 0.044725432991981506 2023-01-22 07:57:59.009106: step: 360/77, loss: 0.012253060936927795 2023-01-22 07:58:00.306506: step: 364/77, loss: 0.01013081893324852 2023-01-22 07:58:01.625547: step: 368/77, loss: 0.044951606541872025 2023-01-22 07:58:02.929982: step: 372/77, loss: 0.04763204604387283 2023-01-22 07:58:04.229957: step: 376/77, loss: 0.05810655653476715 2023-01-22 07:58:05.491497: step: 380/77, loss: 0.02208855375647545 2023-01-22 07:58:06.820704: step: 384/77, loss: 0.008081790059804916 2023-01-22 07:58:08.107849: step: 388/77, loss: 0.016579212620854378 ================================================== Loss: 0.038 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 5 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:00:13.381070: step: 4/77, loss: 0.019590124487876892 2023-01-22 08:00:14.709492: step: 8/77, loss: 0.023027315735816956 2023-01-22 08:00:15.995396: step: 12/77, loss: 0.029082786291837692 2023-01-22 08:00:17.311566: step: 16/77, loss: 0.026010677218437195 2023-01-22 08:00:18.654847: step: 20/77, loss: 0.012876695021986961 2023-01-22 08:00:19.910201: step: 24/77, loss: 0.0071772001683712006 2023-01-22 08:00:21.177670: step: 28/77, loss: 0.013369128108024597 2023-01-22 08:00:22.477404: step: 32/77, loss: 0.00957412552088499 2023-01-22 08:00:23.797259: step: 36/77, loss: 0.009424794465303421 2023-01-22 08:00:25.080160: step: 40/77, loss: 0.02984057366847992 2023-01-22 08:00:26.419887: step: 44/77, loss: 0.07836680859327316 2023-01-22 08:00:27.778091: step: 48/77, loss: 0.04779861867427826 2023-01-22 08:00:29.112993: step: 52/77, loss: 0.02072271518409252 2023-01-22 08:00:30.430120: step: 56/77, loss: 0.028426162898540497 2023-01-22 08:00:31.734012: step: 60/77, loss: 0.020091162994503975 2023-01-22 08:00:33.040748: step: 64/77, loss: 0.03691019117832184 2023-01-22 08:00:34.329984: step: 68/77, loss: 0.08650784194469452 2023-01-22 08:00:35.651130: step: 72/77, loss: 0.08476677536964417 2023-01-22 08:00:36.971817: step: 76/77, loss: 0.021942488849163055 2023-01-22 08:00:38.296965: step: 80/77, loss: 0.010857520624995232 2023-01-22 08:00:39.626180: step: 84/77, loss: 0.03199823573231697 2023-01-22 08:00:41.007561: step: 88/77, loss: 0.036400698125362396 2023-01-22 08:00:42.309619: step: 92/77, loss: 0.058628231287002563 2023-01-22 08:00:43.595811: step: 96/77, loss: 0.03350096940994263 2023-01-22 08:00:44.875682: step: 100/77, loss: 0.054468218237161636 2023-01-22 08:00:46.191850: step: 104/77, loss: 0.007558372337371111 2023-01-22 08:00:47.538775: step: 108/77, loss: 0.014154444448649883 2023-01-22 08:00:48.842158: step: 112/77, loss: 0.00751089584082365 2023-01-22 08:00:50.206548: step: 116/77, loss: 0.00582252349704504 2023-01-22 08:00:51.504392: step: 120/77, loss: 0.010972294956445694 2023-01-22 08:00:52.807809: step: 124/77, loss: 0.021045461297035217 2023-01-22 08:00:54.126275: step: 128/77, loss: 0.0017005748813971877 2023-01-22 08:00:55.455152: step: 132/77, loss: 0.04809816554188728 2023-01-22 08:00:56.749146: step: 136/77, loss: 0.010070187970995903 2023-01-22 08:00:58.027769: step: 140/77, loss: 0.00459901150316 2023-01-22 08:00:59.332313: step: 144/77, loss: 0.010928267613053322 2023-01-22 08:01:00.590699: step: 148/77, loss: 0.018509894609451294 2023-01-22 08:01:01.902354: step: 152/77, loss: 0.014490798115730286 2023-01-22 08:01:03.174305: step: 156/77, loss: 0.003958669491112232 2023-01-22 08:01:04.497659: step: 160/77, loss: 0.01771484687924385 2023-01-22 08:01:05.781905: step: 164/77, loss: 0.008731606416404247 2023-01-22 08:01:07.151715: step: 168/77, loss: 0.03572859242558479 2023-01-22 08:01:08.515481: step: 172/77, loss: 0.020078785717487335 2023-01-22 08:01:09.799497: step: 176/77, loss: 0.002374204806983471 2023-01-22 08:01:11.120339: step: 180/77, loss: 0.013053692877292633 2023-01-22 08:01:12.488103: step: 184/77, loss: 0.04168858379125595 2023-01-22 08:01:13.869269: step: 188/77, loss: 0.030219757929444313 2023-01-22 08:01:15.178574: step: 192/77, loss: 0.08529096841812134 2023-01-22 08:01:16.471735: step: 196/77, loss: 0.03557312861084938 2023-01-22 08:01:17.825762: step: 200/77, loss: 0.0012289071455597878 2023-01-22 08:01:19.140208: step: 204/77, loss: 0.028531398624181747 2023-01-22 08:01:20.470508: step: 208/77, loss: 0.0033116433769464493 2023-01-22 08:01:21.812361: step: 212/77, loss: 0.0015021846629679203 2023-01-22 08:01:23.157882: step: 216/77, loss: 0.07549797743558884 2023-01-22 08:01:24.496915: step: 220/77, loss: 0.062390752136707306 2023-01-22 08:01:25.795770: step: 224/77, loss: 0.03732169046998024 2023-01-22 08:01:27.079076: step: 228/77, loss: 0.000875171332154423 2023-01-22 08:01:28.451352: step: 232/77, loss: 0.07455716282129288 2023-01-22 08:01:29.770060: step: 236/77, loss: 0.0032223116140812635 2023-01-22 08:01:31.081349: step: 240/77, loss: 0.002118849428370595 2023-01-22 08:01:32.408473: step: 244/77, loss: 0.012293724343180656 2023-01-22 08:01:33.696787: step: 248/77, loss: 0.06768789142370224 2023-01-22 08:01:35.006881: step: 252/77, loss: 0.08260074257850647 2023-01-22 08:01:36.366497: step: 256/77, loss: 0.09293807297945023 2023-01-22 08:01:37.691127: step: 260/77, loss: 0.031117822974920273 2023-01-22 08:01:38.961291: step: 264/77, loss: 0.003587220562621951 2023-01-22 08:01:40.242067: step: 268/77, loss: 0.01646401360630989 2023-01-22 08:01:41.574249: step: 272/77, loss: 0.0020924354903399944 2023-01-22 08:01:42.895057: step: 276/77, loss: 0.0006436010007746518 2023-01-22 08:01:44.226019: step: 280/77, loss: 0.005888496059924364 2023-01-22 08:01:45.585487: step: 284/77, loss: 0.003980573266744614 2023-01-22 08:01:46.932562: step: 288/77, loss: 0.040291935205459595 2023-01-22 08:01:48.243382: step: 292/77, loss: 0.004146946594119072 2023-01-22 08:01:49.521196: step: 296/77, loss: 0.029858361929655075 2023-01-22 08:01:50.880417: step: 300/77, loss: 0.09856970608234406 2023-01-22 08:01:52.236681: step: 304/77, loss: 0.049976229667663574 2023-01-22 08:01:53.569885: step: 308/77, loss: 0.07172711193561554 2023-01-22 08:01:54.908521: step: 312/77, loss: 0.03535992652177811 2023-01-22 08:01:56.202879: step: 316/77, loss: 0.005112402141094208 2023-01-22 08:01:57.515826: step: 320/77, loss: 0.0004511027073021978 2023-01-22 08:01:58.805455: step: 324/77, loss: 0.02633637934923172 2023-01-22 08:02:00.141062: step: 328/77, loss: 0.005359982140362263 2023-01-22 08:02:01.514394: step: 332/77, loss: 0.0010264780139550567 2023-01-22 08:02:02.814102: step: 336/77, loss: 0.051413118839263916 2023-01-22 08:02:04.116212: step: 340/77, loss: 0.005643540993332863 2023-01-22 08:02:05.443747: step: 344/77, loss: 0.09154492616653442 2023-01-22 08:02:06.807362: step: 348/77, loss: 0.31316933035850525 2023-01-22 08:02:08.131657: step: 352/77, loss: 0.0036383122205734253 2023-01-22 08:02:09.428095: step: 356/77, loss: 0.007959169335663319 2023-01-22 08:02:10.754004: step: 360/77, loss: 0.021040400490164757 2023-01-22 08:02:12.109968: step: 364/77, loss: 0.00927928276360035 2023-01-22 08:02:13.446567: step: 368/77, loss: 0.01899535395205021 2023-01-22 08:02:14.731326: step: 372/77, loss: 0.018808195367455482 2023-01-22 08:02:16.019867: step: 376/77, loss: 0.0040769632905721664 2023-01-22 08:02:17.372474: step: 380/77, loss: 0.0692996233701706 2023-01-22 08:02:18.660216: step: 384/77, loss: 0.011889062821865082 2023-01-22 08:02:20.020673: step: 388/77, loss: 0.20114099979400635 ================================================== Loss: 0.032 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test Chinese: {'template': {'p': 0.9841269841269841, 'r': 0.484375, 'f1': 0.6492146596858638}, 'slot': {'p': 0.5555555555555556, 'r': 0.009157509157509158, 'f1': 0.01801801801801802}, 'combined': 0.011697561435781332, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test Korean: {'template': {'p': 0.9841269841269841, 'r': 0.484375, 'f1': 0.6492146596858638}, 'slot': {'p': 0.5555555555555556, 'r': 0.009157509157509158, 'f1': 0.01801801801801802}, 'combined': 0.011697561435781332, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test Russian: {'template': {'p': 0.984375, 'r': 0.4921875, 'f1': 0.65625}, 'slot': {'p': 0.5555555555555556, 'r': 0.009157509157509158, 'f1': 0.01801801801801802}, 'combined': 0.011824324324324327, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 6 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:04:08.046415: step: 4/77, loss: 0.023742811754345894 2023-01-22 08:04:09.328681: step: 8/77, loss: 0.0024210514966398478 2023-01-22 08:04:10.616740: step: 12/77, loss: 0.031761955469846725 2023-01-22 08:04:11.886986: step: 16/77, loss: 0.023868165910243988 2023-01-22 08:04:13.226250: step: 20/77, loss: 0.0014466585125774145 2023-01-22 08:04:14.517874: step: 24/77, loss: 0.02680317685008049 2023-01-22 08:04:15.825469: step: 28/77, loss: 0.0022497575264424086 2023-01-22 08:04:17.130585: step: 32/77, loss: 0.05933031439781189 2023-01-22 08:04:18.435563: step: 36/77, loss: 0.0024521953891962767 2023-01-22 08:04:19.709466: step: 40/77, loss: 0.003981100395321846 2023-01-22 08:04:20.991890: step: 44/77, loss: 0.06482931226491928 2023-01-22 08:04:22.357745: step: 48/77, loss: 0.03707173466682434 2023-01-22 08:04:23.683111: step: 52/77, loss: 0.006385110784322023 2023-01-22 08:04:24.980021: step: 56/77, loss: 0.03251034766435623 2023-01-22 08:04:26.249034: step: 60/77, loss: 0.01326783001422882 2023-01-22 08:04:27.575173: step: 64/77, loss: 0.03662087395787239 2023-01-22 08:04:28.852367: step: 68/77, loss: 0.022754115983843803 2023-01-22 08:04:30.186180: step: 72/77, loss: 0.007800246123224497 2023-01-22 08:04:31.543898: step: 76/77, loss: 0.037100229412317276 2023-01-22 08:04:32.830831: step: 80/77, loss: 0.002311698393896222 2023-01-22 08:04:34.170965: step: 84/77, loss: 0.0027715619653463364 2023-01-22 08:04:35.449818: step: 88/77, loss: 0.15742728114128113 2023-01-22 08:04:36.767240: step: 92/77, loss: 0.016606274992227554 2023-01-22 08:04:38.058957: step: 96/77, loss: 0.013384426012635231 2023-01-22 08:04:39.465513: step: 100/77, loss: 0.032797813415527344 2023-01-22 08:04:40.818822: step: 104/77, loss: 0.008585028350353241 2023-01-22 08:04:42.033641: step: 108/77, loss: 0.02555439993739128 2023-01-22 08:04:43.379171: step: 112/77, loss: 0.009502370841801167 2023-01-22 08:04:44.704596: step: 116/77, loss: 0.0031764251179993153 2023-01-22 08:04:46.032664: step: 120/77, loss: 0.002601939719170332 2023-01-22 08:04:47.345069: step: 124/77, loss: 0.0356743261218071 2023-01-22 08:04:48.691789: step: 128/77, loss: 0.015332273207604885 2023-01-22 08:04:50.057517: step: 132/77, loss: 0.012819968163967133 2023-01-22 08:04:51.387344: step: 136/77, loss: 0.030379636213183403 2023-01-22 08:04:52.699710: step: 140/77, loss: 0.00024762097746133804 2023-01-22 08:04:54.006581: step: 144/77, loss: 0.0017826403491199017 2023-01-22 08:04:55.306960: step: 148/77, loss: 0.025525454431772232 2023-01-22 08:04:56.642700: step: 152/77, loss: 0.009922852739691734 2023-01-22 08:04:57.959254: step: 156/77, loss: 0.030485069379210472 2023-01-22 08:04:59.311365: step: 160/77, loss: 0.015530181117355824 2023-01-22 08:05:00.659224: step: 164/77, loss: 0.05367741733789444 2023-01-22 08:05:01.962845: step: 168/77, loss: 0.0014602728188037872 2023-01-22 08:05:03.277524: step: 172/77, loss: 0.09227390587329865 2023-01-22 08:05:04.574558: step: 176/77, loss: 0.0007054744055494666 2023-01-22 08:05:05.902331: step: 180/77, loss: 0.09929867088794708 2023-01-22 08:05:07.231724: step: 184/77, loss: 0.002446091268211603 2023-01-22 08:05:08.571809: step: 188/77, loss: 0.04151029884815216 2023-01-22 08:05:09.924613: step: 192/77, loss: 0.04282946512103081 2023-01-22 08:05:11.261962: step: 196/77, loss: 0.12917132675647736 2023-01-22 08:05:12.603266: step: 200/77, loss: 0.07418958842754364 2023-01-22 08:05:13.936809: step: 204/77, loss: 0.10528351366519928 2023-01-22 08:05:15.232430: step: 208/77, loss: 0.05371832475066185 2023-01-22 08:05:16.571021: step: 212/77, loss: 0.013339102268218994 2023-01-22 08:05:17.907010: step: 216/77, loss: 0.041499387472867966 2023-01-22 08:05:19.187564: step: 220/77, loss: 0.03752455860376358 2023-01-22 08:05:20.528800: step: 224/77, loss: 0.019607428461313248 2023-01-22 08:05:21.867748: step: 228/77, loss: 0.014202874153852463 2023-01-22 08:05:23.176788: step: 232/77, loss: 0.05125613883137703 2023-01-22 08:05:24.514850: step: 236/77, loss: 0.035229314118623734 2023-01-22 08:05:25.832315: step: 240/77, loss: 0.060807421803474426 2023-01-22 08:05:27.164575: step: 244/77, loss: 0.007476100232452154 2023-01-22 08:05:28.482997: step: 248/77, loss: 0.00987208727747202 2023-01-22 08:05:29.831014: step: 252/77, loss: 0.010732964612543583 2023-01-22 08:05:31.149886: step: 256/77, loss: 0.016423512250185013 2023-01-22 08:05:32.506667: step: 260/77, loss: 0.047358062118291855 2023-01-22 08:05:33.849702: step: 264/77, loss: 0.06571957468986511 2023-01-22 08:05:35.169071: step: 268/77, loss: 0.013337070122361183 2023-01-22 08:05:36.499765: step: 272/77, loss: 0.029291309416294098 2023-01-22 08:05:37.805870: step: 276/77, loss: 0.0034126536920666695 2023-01-22 08:05:39.132672: step: 280/77, loss: 0.014468264766037464 2023-01-22 08:05:40.530717: step: 284/77, loss: 0.03605595603585243 2023-01-22 08:05:41.833357: step: 288/77, loss: 0.017270473763346672 2023-01-22 08:05:43.156892: step: 292/77, loss: 0.01606428064405918 2023-01-22 08:05:44.514267: step: 296/77, loss: 0.01870177686214447 2023-01-22 08:05:45.837822: step: 300/77, loss: 0.01481685135513544 2023-01-22 08:05:47.181920: step: 304/77, loss: 0.014699216932058334 2023-01-22 08:05:48.484362: step: 308/77, loss: 0.011047665029764175 2023-01-22 08:05:49.872714: step: 312/77, loss: 0.01075662486255169 2023-01-22 08:05:51.205475: step: 316/77, loss: 0.029328038915991783 2023-01-22 08:05:52.543860: step: 320/77, loss: 0.01761351153254509 2023-01-22 08:05:53.846723: step: 324/77, loss: 0.012456723488867283 2023-01-22 08:05:55.135946: step: 328/77, loss: 0.020048733800649643 2023-01-22 08:05:56.536911: step: 332/77, loss: 0.05049295350909233 2023-01-22 08:05:57.843678: step: 336/77, loss: 0.04084024205803871 2023-01-22 08:05:59.205005: step: 340/77, loss: 0.00574206979945302 2023-01-22 08:06:00.499648: step: 344/77, loss: 0.06762515753507614 2023-01-22 08:06:01.803687: step: 348/77, loss: 0.017670320346951485 2023-01-22 08:06:03.115241: step: 352/77, loss: 0.021465200930833817 2023-01-22 08:06:04.484299: step: 356/77, loss: 0.03226961940526962 2023-01-22 08:06:05.819227: step: 360/77, loss: 0.042687464505434036 2023-01-22 08:06:07.141665: step: 364/77, loss: 0.011565061286091805 2023-01-22 08:06:08.480120: step: 368/77, loss: 0.10414603352546692 2023-01-22 08:06:09.826562: step: 372/77, loss: 0.00794203020632267 2023-01-22 08:06:11.209126: step: 376/77, loss: 0.036036621779203415 2023-01-22 08:06:12.534980: step: 380/77, loss: 0.0044304742477834225 2023-01-22 08:06:13.801983: step: 384/77, loss: 0.08746032416820526 2023-01-22 08:06:15.070239: step: 388/77, loss: 0.016607046127319336 ================================================== Loss: 0.029 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.45, 'f1': 0.6206896551724138}, 'slot': {'p': 0.5416666666666666, 'r': 0.024574669187145556, 'f1': 0.04701627486437612}, 'combined': 0.029182515433061045, 'epoch': 6} Test Chinese: {'template': {'p': 0.9838709677419355, 'r': 0.4765625, 'f1': 0.6421052631578947}, 'slot': {'p': 0.5263157894736842, 'r': 0.009157509157509158, 'f1': 0.018001800180018006}, 'combined': 0.011559050641906298, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5333333333333333, 'r': 0.030245746691871456, 'f1': 0.05724508050089446}, 'combined': 0.03816338700059631, 'epoch': 6} Test Korean: {'template': {'p': 0.9836065573770492, 'r': 0.46875, 'f1': 0.6349206349206349}, 'slot': {'p': 0.5555555555555556, 'r': 0.009157509157509158, 'f1': 0.01801801801801802}, 'combined': 0.011440011440011442, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.45, 'f1': 0.6206896551724138}, 'slot': {'p': 0.5384615384615384, 'r': 0.026465028355387523, 'f1': 0.05045045045045044}, 'combined': 0.03131407269338304, 'epoch': 6} Test Russian: {'template': {'p': 0.9838709677419355, 'r': 0.4765625, 'f1': 0.6421052631578947}, 'slot': {'p': 0.5555555555555556, 'r': 0.009157509157509158, 'f1': 0.01801801801801802}, 'combined': 0.01156946420104315, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 7 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:08:01.868364: step: 4/77, loss: 0.0031209909357130527 2023-01-22 08:08:03.166749: step: 8/77, loss: 0.08783739805221558 2023-01-22 08:08:04.456791: step: 12/77, loss: 0.0652967095375061 2023-01-22 08:08:05.744781: step: 16/77, loss: 0.009294316172599792 2023-01-22 08:08:07.068718: step: 20/77, loss: 0.01650993712246418 2023-01-22 08:08:08.347461: step: 24/77, loss: 0.006153582129627466 2023-01-22 08:08:09.680285: step: 28/77, loss: 0.005866794381290674 2023-01-22 08:08:11.035142: step: 32/77, loss: 0.005742167588323355 2023-01-22 08:08:12.347857: step: 36/77, loss: 0.01967783458530903 2023-01-22 08:08:13.650065: step: 40/77, loss: 0.014993395656347275 2023-01-22 08:08:14.962089: step: 44/77, loss: 0.019814079627394676 2023-01-22 08:08:16.307304: step: 48/77, loss: 0.010505028069019318 2023-01-22 08:08:17.626310: step: 52/77, loss: 0.03083161637187004 2023-01-22 08:08:18.908924: step: 56/77, loss: 0.004766981117427349 2023-01-22 08:08:20.258479: step: 60/77, loss: 0.002936827950179577 2023-01-22 08:08:21.586345: step: 64/77, loss: 0.0015104500344023108 2023-01-22 08:08:22.853976: step: 68/77, loss: 0.001246319618076086 2023-01-22 08:08:24.176514: step: 72/77, loss: 0.00507638044655323 2023-01-22 08:08:25.497667: step: 76/77, loss: 0.021538907662034035 2023-01-22 08:08:26.745618: step: 80/77, loss: 0.07426765561103821 2023-01-22 08:08:28.025682: step: 84/77, loss: 0.0063897836953401566 2023-01-22 08:08:29.303529: step: 88/77, loss: 0.011027699336409569 2023-01-22 08:08:30.681698: step: 92/77, loss: 0.001949557103216648 2023-01-22 08:08:32.012393: step: 96/77, loss: 0.0005657103611156344 2023-01-22 08:08:33.319248: step: 100/77, loss: 0.05404621735215187 2023-01-22 08:08:34.592426: step: 104/77, loss: 0.02471388503909111 2023-01-22 08:08:35.881833: step: 108/77, loss: 0.04665075242519379 2023-01-22 08:08:37.227361: step: 112/77, loss: 0.0053888545371592045 2023-01-22 08:08:38.501280: step: 116/77, loss: 0.07282565534114838 2023-01-22 08:08:39.807268: step: 120/77, loss: 0.0022747742477804422 2023-01-22 08:08:41.141715: step: 124/77, loss: 0.07615106552839279 2023-01-22 08:08:42.458312: step: 128/77, loss: 0.022499775514006615 2023-01-22 08:08:43.727063: step: 132/77, loss: 0.02364405244588852 2023-01-22 08:08:45.029868: step: 136/77, loss: 0.05657701939344406 2023-01-22 08:08:46.315747: step: 140/77, loss: 0.0018406548770144582 2023-01-22 08:08:47.657528: step: 144/77, loss: 0.05552219972014427 2023-01-22 08:08:48.943293: step: 148/77, loss: 0.018158987164497375 2023-01-22 08:08:50.245149: step: 152/77, loss: 0.009586824104189873 2023-01-22 08:08:51.560743: step: 156/77, loss: 0.008389465510845184 2023-01-22 08:08:52.862021: step: 160/77, loss: 0.04054231569170952 2023-01-22 08:08:54.197905: step: 164/77, loss: 0.020485740154981613 2023-01-22 08:08:55.508986: step: 168/77, loss: 0.024340663105249405 2023-01-22 08:08:56.806409: step: 172/77, loss: 0.009585918858647346 2023-01-22 08:08:58.129178: step: 176/77, loss: 0.018610753118991852 2023-01-22 08:08:59.445112: step: 180/77, loss: 0.016117611899971962 2023-01-22 08:09:00.748968: step: 184/77, loss: 0.06756845861673355 2023-01-22 08:09:02.072065: step: 188/77, loss: 0.019038718193769455 2023-01-22 08:09:03.388533: step: 192/77, loss: 0.04110538959503174 2023-01-22 08:09:04.712436: step: 196/77, loss: 0.03693665936589241 2023-01-22 08:09:06.090474: step: 200/77, loss: 0.021302184090018272 2023-01-22 08:09:07.413118: step: 204/77, loss: 0.002271834993734956 2023-01-22 08:09:08.741416: step: 208/77, loss: 0.043990932404994965 2023-01-22 08:09:10.065931: step: 212/77, loss: 0.021895771846175194 2023-01-22 08:09:11.406575: step: 216/77, loss: 0.007647061720490456 2023-01-22 08:09:12.767029: step: 220/77, loss: 0.13254640996456146 2023-01-22 08:09:14.075889: step: 224/77, loss: 0.027036642655730247 2023-01-22 08:09:15.399312: step: 228/77, loss: 0.04646734148263931 2023-01-22 08:09:16.713405: step: 232/77, loss: 0.0006018686690367758 2023-01-22 08:09:18.022894: step: 236/77, loss: 0.011303732171654701 2023-01-22 08:09:19.384290: step: 240/77, loss: 0.015093307942152023 2023-01-22 08:09:20.707256: step: 244/77, loss: 0.0010103249223902822 2023-01-22 08:09:22.022893: step: 248/77, loss: 0.00565328449010849 2023-01-22 08:09:23.324516: step: 252/77, loss: 0.02016383968293667 2023-01-22 08:09:24.620137: step: 256/77, loss: 0.02276289090514183 2023-01-22 08:09:25.924293: step: 260/77, loss: 0.11638316512107849 2023-01-22 08:09:27.237555: step: 264/77, loss: 0.008772538974881172 2023-01-22 08:09:28.542483: step: 268/77, loss: 0.00944832805544138 2023-01-22 08:09:29.858935: step: 272/77, loss: 0.004265233408659697 2023-01-22 08:09:31.159802: step: 276/77, loss: 0.0005268824170343578 2023-01-22 08:09:32.507717: step: 280/77, loss: 0.00032209057826548815 2023-01-22 08:09:33.863464: step: 284/77, loss: 0.001351947314105928 2023-01-22 08:09:35.219764: step: 288/77, loss: 0.0057523720897734165 2023-01-22 08:09:36.543469: step: 292/77, loss: 0.010685701854526997 2023-01-22 08:09:37.894893: step: 296/77, loss: 0.0319397896528244 2023-01-22 08:09:39.236243: step: 300/77, loss: 0.06816261261701584 2023-01-22 08:09:40.565540: step: 304/77, loss: 0.005405607167631388 2023-01-22 08:09:41.825296: step: 308/77, loss: 0.01193196326494217 2023-01-22 08:09:43.085371: step: 312/77, loss: 0.006270136684179306 2023-01-22 08:09:44.402196: step: 316/77, loss: 0.003654724918305874 2023-01-22 08:09:45.758542: step: 320/77, loss: 0.006270033307373524 2023-01-22 08:09:47.044413: step: 324/77, loss: 0.011288300156593323 2023-01-22 08:09:48.357503: step: 328/77, loss: 0.041433122009038925 2023-01-22 08:09:49.683528: step: 332/77, loss: 0.010194050148129463 2023-01-22 08:09:50.982584: step: 336/77, loss: 0.013408026657998562 2023-01-22 08:09:52.313888: step: 340/77, loss: 0.017604660242795944 2023-01-22 08:09:53.595541: step: 344/77, loss: 0.030220985412597656 2023-01-22 08:09:54.900152: step: 348/77, loss: 0.009208658710122108 2023-01-22 08:09:56.220468: step: 352/77, loss: 0.0023348366376012564 2023-01-22 08:09:57.534500: step: 356/77, loss: 0.012271026149392128 2023-01-22 08:09:58.839139: step: 360/77, loss: 0.015157275833189487 2023-01-22 08:10:00.186910: step: 364/77, loss: 0.0011631695087999105 2023-01-22 08:10:01.522688: step: 368/77, loss: 0.01727611944079399 2023-01-22 08:10:02.800213: step: 372/77, loss: 0.0032124074641615152 2023-01-22 08:10:04.116623: step: 376/77, loss: 0.0028875365387648344 2023-01-22 08:10:05.441048: step: 380/77, loss: 0.0026917150244116783 2023-01-22 08:10:06.764059: step: 384/77, loss: 0.02284029684960842 2023-01-22 08:10:08.131210: step: 388/77, loss: 0.021598536521196365 ================================================== Loss: 0.022 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 7} Test Chinese: {'template': {'p': 0.9545454545454546, 'r': 0.4921875, 'f1': 0.6494845360824743}, 'slot': {'p': 0.6842105263157895, 'r': 0.011904761904761904, 'f1': 0.023402340234023402}, 'combined': 0.015199458090138911, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 7} Test Korean: {'template': {'p': 0.9545454545454546, 'r': 0.4921875, 'f1': 0.6494845360824743}, 'slot': {'p': 0.6842105263157895, 'r': 0.011904761904761904, 'f1': 0.023402340234023402}, 'combined': 0.015199458090138911, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 7} Test Russian: {'template': {'p': 0.9545454545454546, 'r': 0.4921875, 'f1': 0.6494845360824743}, 'slot': {'p': 0.6842105263157895, 'r': 0.011904761904761904, 'f1': 0.023402340234023402}, 'combined': 0.015199458090138911, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 8 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:11:54.837878: step: 4/77, loss: 0.003462360007688403 2023-01-22 08:11:56.169052: step: 8/77, loss: 0.03158210217952728 2023-01-22 08:11:57.474305: step: 12/77, loss: 0.027815116569399834 2023-01-22 08:11:58.763191: step: 16/77, loss: 0.010638121515512466 2023-01-22 08:12:00.119420: step: 20/77, loss: 0.021807294338941574 2023-01-22 08:12:01.391557: step: 24/77, loss: 0.08031920343637466 2023-01-22 08:12:02.692351: step: 28/77, loss: 0.02158435806632042 2023-01-22 08:12:03.973813: step: 32/77, loss: 0.038904719054698944 2023-01-22 08:12:05.294988: step: 36/77, loss: 0.03839685022830963 2023-01-22 08:12:06.580506: step: 40/77, loss: 0.03866554796695709 2023-01-22 08:12:07.889594: step: 44/77, loss: 6.99927331879735e-05 2023-01-22 08:12:09.207026: step: 48/77, loss: 0.03688203543424606 2023-01-22 08:12:10.569757: step: 52/77, loss: 0.004742838907986879 2023-01-22 08:12:11.897309: step: 56/77, loss: 0.028267303481698036 2023-01-22 08:12:13.187433: step: 60/77, loss: 0.09787152707576752 2023-01-22 08:12:14.491656: step: 64/77, loss: 0.007006022147834301 2023-01-22 08:12:15.857639: step: 68/77, loss: 0.009687730111181736 2023-01-22 08:12:17.175757: step: 72/77, loss: 0.0010902268113568425 2023-01-22 08:12:18.491320: step: 76/77, loss: 0.00017482312978245318 2023-01-22 08:12:19.752471: step: 80/77, loss: 0.0049322182312607765 2023-01-22 08:12:21.104158: step: 84/77, loss: 0.015547310933470726 2023-01-22 08:12:22.380978: step: 88/77, loss: 0.01092690508812666 2023-01-22 08:12:23.729109: step: 92/77, loss: 0.000868482340592891 2023-01-22 08:12:25.033701: step: 96/77, loss: 0.021374428644776344 2023-01-22 08:12:26.358389: step: 100/77, loss: 0.03838520497083664 2023-01-22 08:12:27.704028: step: 104/77, loss: 0.02330004796385765 2023-01-22 08:12:28.982793: step: 108/77, loss: 0.0010328067000955343 2023-01-22 08:12:30.291746: step: 112/77, loss: 0.041627462953329086 2023-01-22 08:12:31.641606: step: 116/77, loss: 0.016076091676950455 2023-01-22 08:12:32.941534: step: 120/77, loss: 0.0010974672622978687 2023-01-22 08:12:34.269058: step: 124/77, loss: 0.06853155046701431 2023-01-22 08:12:35.553980: step: 128/77, loss: 0.047840192914009094 2023-01-22 08:12:36.832732: step: 132/77, loss: 0.0012306292774155736 2023-01-22 08:12:38.211284: step: 136/77, loss: 0.01438758336007595 2023-01-22 08:12:39.537387: step: 140/77, loss: 0.03334816172719002 2023-01-22 08:12:40.850187: step: 144/77, loss: 0.0003137671446893364 2023-01-22 08:12:42.161595: step: 148/77, loss: 0.0004342859610915184 2023-01-22 08:12:43.443777: step: 152/77, loss: 0.032907724380493164 2023-01-22 08:12:44.792250: step: 156/77, loss: 0.07321220636367798 2023-01-22 08:12:46.089142: step: 160/77, loss: 0.008549018763005733 2023-01-22 08:12:47.438984: step: 164/77, loss: 0.00022818568686489016 2023-01-22 08:12:48.792930: step: 168/77, loss: 0.00012675569450948387 2023-01-22 08:12:50.094917: step: 172/77, loss: 0.04355047643184662 2023-01-22 08:12:51.409241: step: 176/77, loss: 0.015296213328838348 2023-01-22 08:12:52.742462: step: 180/77, loss: 0.00031988683622330427 2023-01-22 08:12:54.012378: step: 184/77, loss: 3.0125163902994245e-05 2023-01-22 08:12:55.319489: step: 188/77, loss: 0.0007129679434001446 2023-01-22 08:12:56.623525: step: 192/77, loss: 0.007171159144490957 2023-01-22 08:12:57.922328: step: 196/77, loss: 0.04945923015475273 2023-01-22 08:12:59.180768: step: 200/77, loss: 0.0023911334574222565 2023-01-22 08:13:00.535995: step: 204/77, loss: 0.06517422944307327 2023-01-22 08:13:01.864301: step: 208/77, loss: 0.022393066436052322 2023-01-22 08:13:03.191043: step: 212/77, loss: 0.014836644753813744 2023-01-22 08:13:04.515907: step: 216/77, loss: 0.019449369981884956 2023-01-22 08:13:05.886492: step: 220/77, loss: 0.0018990390235558152 2023-01-22 08:13:07.202308: step: 224/77, loss: 0.028624853119254112 2023-01-22 08:13:08.525859: step: 228/77, loss: 0.010941157117486 2023-01-22 08:13:09.821538: step: 232/77, loss: 0.005102076567709446 2023-01-22 08:13:11.154927: step: 236/77, loss: 0.06938213109970093 2023-01-22 08:13:12.501274: step: 240/77, loss: 0.034478601068258286 2023-01-22 08:13:13.802504: step: 244/77, loss: 0.03948065638542175 2023-01-22 08:13:15.156205: step: 248/77, loss: 0.026814214885234833 2023-01-22 08:13:16.564933: step: 252/77, loss: 0.036289360374212265 2023-01-22 08:13:17.847449: step: 256/77, loss: 0.003869938664138317 2023-01-22 08:13:19.193015: step: 260/77, loss: 0.011877670884132385 2023-01-22 08:13:20.581810: step: 264/77, loss: 0.021154792979359627 2023-01-22 08:13:21.907932: step: 268/77, loss: 0.018810829147696495 2023-01-22 08:13:23.264654: step: 272/77, loss: 0.12980136275291443 2023-01-22 08:13:24.620785: step: 276/77, loss: 0.01171032339334488 2023-01-22 08:13:25.976951: step: 280/77, loss: 0.027855847030878067 2023-01-22 08:13:27.297751: step: 284/77, loss: 0.006162848323583603 2023-01-22 08:13:28.658738: step: 288/77, loss: 0.023323755711317062 2023-01-22 08:13:29.965811: step: 292/77, loss: 0.009866025298833847 2023-01-22 08:13:31.379205: step: 296/77, loss: 0.05878325179219246 2023-01-22 08:13:32.740139: step: 300/77, loss: 0.006912640295922756 2023-01-22 08:13:34.054489: step: 304/77, loss: 0.002645879751071334 2023-01-22 08:13:35.388445: step: 308/77, loss: 0.040564458817243576 2023-01-22 08:13:36.734173: step: 312/77, loss: 0.028605114668607712 2023-01-22 08:13:38.108552: step: 316/77, loss: 0.005716841202229261 2023-01-22 08:13:39.477111: step: 320/77, loss: 0.002674217103049159 2023-01-22 08:13:40.842981: step: 324/77, loss: 0.019984597340226173 2023-01-22 08:13:42.180383: step: 328/77, loss: 0.04892556741833687 2023-01-22 08:13:43.531080: step: 332/77, loss: 0.04373926669359207 2023-01-22 08:13:44.853800: step: 336/77, loss: 0.017949160188436508 2023-01-22 08:13:46.156693: step: 340/77, loss: 0.009541014209389687 2023-01-22 08:13:47.464881: step: 344/77, loss: 0.020409800112247467 2023-01-22 08:13:48.782030: step: 348/77, loss: 0.00047758466098457575 2023-01-22 08:13:50.086164: step: 352/77, loss: 0.0053678578697144985 2023-01-22 08:13:51.386343: step: 356/77, loss: 0.005686678923666477 2023-01-22 08:13:52.725719: step: 360/77, loss: 0.011399084702134132 2023-01-22 08:13:54.067437: step: 364/77, loss: 0.002073788084089756 2023-01-22 08:13:55.351044: step: 368/77, loss: 0.0068801334127783775 2023-01-22 08:13:56.665606: step: 372/77, loss: 0.036192066967487335 2023-01-22 08:13:57.954669: step: 376/77, loss: 0.022756557911634445 2023-01-22 08:13:59.253274: step: 380/77, loss: 0.024984458461403847 2023-01-22 08:14:00.588422: step: 384/77, loss: 0.000745010154787451 2023-01-22 08:14:01.959221: step: 388/77, loss: 0.009983447380363941 ================================================== Loss: 0.022 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Chinese: {'template': {'p': 0.9821428571428571, 'r': 0.4296875, 'f1': 0.5978260869565216}, 'slot': {'p': 0.625, 'r': 0.009157509157509158, 'f1': 0.01805054151624549}, 'combined': 0.01079108460210328, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Korean: {'template': {'p': 0.9821428571428571, 'r': 0.4296875, 'f1': 0.5978260869565216}, 'slot': {'p': 0.625, 'r': 0.009157509157509158, 'f1': 0.01805054151624549}, 'combined': 0.01079108460210328, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Russian: {'template': {'p': 0.9833333333333333, 'r': 0.4609375, 'f1': 0.6276595744680851}, 'slot': {'p': 0.625, 'r': 0.009157509157509158, 'f1': 0.01805054151624549}, 'combined': 0.011329595207005147, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 9 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:15:48.739439: step: 4/77, loss: 0.003912047017365694 2023-01-22 08:15:50.020936: step: 8/77, loss: 0.011383119970560074 2023-01-22 08:15:51.316403: step: 12/77, loss: 0.051056768745183945 2023-01-22 08:15:52.627239: step: 16/77, loss: 0.005373623222112656 2023-01-22 08:15:53.969969: step: 20/77, loss: 0.009975888766348362 2023-01-22 08:15:55.263063: step: 24/77, loss: 0.014537742361426353 2023-01-22 08:15:56.541672: step: 28/77, loss: 0.025953613221645355 2023-01-22 08:15:57.869709: step: 32/77, loss: 0.007894574664533138 2023-01-22 08:15:59.183234: step: 36/77, loss: 0.020549921318888664 2023-01-22 08:16:00.443168: step: 40/77, loss: 0.008908596821129322 2023-01-22 08:16:01.767706: step: 44/77, loss: 0.001432577962987125 2023-01-22 08:16:03.029832: step: 48/77, loss: 0.02323325350880623 2023-01-22 08:16:04.331682: step: 52/77, loss: 0.01578327640891075 2023-01-22 08:16:05.632818: step: 56/77, loss: 0.000378149765310809 2023-01-22 08:16:06.957690: step: 60/77, loss: 0.010163774713873863 2023-01-22 08:16:08.262875: step: 64/77, loss: 0.032410383224487305 2023-01-22 08:16:09.532607: step: 68/77, loss: 0.00013186398427933455 2023-01-22 08:16:10.852297: step: 72/77, loss: 0.015592390671372414 2023-01-22 08:16:12.164299: step: 76/77, loss: 0.005880615673959255 2023-01-22 08:16:13.523683: step: 80/77, loss: 0.0011790425051003695 2023-01-22 08:16:14.873090: step: 84/77, loss: 0.03505949676036835 2023-01-22 08:16:16.194623: step: 88/77, loss: 0.017734795808792114 2023-01-22 08:16:17.500905: step: 92/77, loss: 0.002167154336348176 2023-01-22 08:16:18.783184: step: 96/77, loss: 0.07464326918125153 2023-01-22 08:16:20.080556: step: 100/77, loss: 0.06874995678663254 2023-01-22 08:16:21.367100: step: 104/77, loss: 0.00012492010137066245 2023-01-22 08:16:22.708031: step: 108/77, loss: 0.008151310496032238 2023-01-22 08:16:24.005763: step: 112/77, loss: 0.003479942213743925 2023-01-22 08:16:25.363861: step: 116/77, loss: 0.014868896454572678 2023-01-22 08:16:26.723352: step: 120/77, loss: 0.015556391328573227 2023-01-22 08:16:27.948843: step: 124/77, loss: 0.029294028878211975 2023-01-22 08:16:29.265255: step: 128/77, loss: 0.02460947260260582 2023-01-22 08:16:30.571975: step: 132/77, loss: 0.009721241891384125 2023-01-22 08:16:31.886312: step: 136/77, loss: 0.0009847991168498993 2023-01-22 08:16:33.155885: step: 140/77, loss: 0.010228264145553112 2023-01-22 08:16:34.453562: step: 144/77, loss: 0.022964006289839745 2023-01-22 08:16:35.788654: step: 148/77, loss: 0.040076129138469696 2023-01-22 08:16:37.082404: step: 152/77, loss: 0.007666187360882759 2023-01-22 08:16:38.364190: step: 156/77, loss: 0.056761860847473145 2023-01-22 08:16:39.671996: step: 160/77, loss: 0.0006916196434758604 2023-01-22 08:16:40.958330: step: 164/77, loss: 9.866947948466986e-05 2023-01-22 08:16:42.278968: step: 168/77, loss: 0.005803945939987898 2023-01-22 08:16:43.611247: step: 172/77, loss: 0.02239947021007538 2023-01-22 08:16:44.900868: step: 176/77, loss: 0.04482104629278183 2023-01-22 08:16:46.244117: step: 180/77, loss: 0.01325925998389721 2023-01-22 08:16:47.544736: step: 184/77, loss: 0.022729776799678802 2023-01-22 08:16:48.910708: step: 188/77, loss: 0.007351192645728588 2023-01-22 08:16:50.238147: step: 192/77, loss: 0.005582916084676981 2023-01-22 08:16:51.571813: step: 196/77, loss: 0.0008952165953814983 2023-01-22 08:16:52.844316: step: 200/77, loss: 0.013445856980979443 2023-01-22 08:16:54.094713: step: 204/77, loss: 0.09656139463186264 2023-01-22 08:16:55.400397: step: 208/77, loss: 0.019810235127806664 2023-01-22 08:16:56.730504: step: 212/77, loss: 0.01166454330086708 2023-01-22 08:16:58.059417: step: 216/77, loss: 0.07258975505828857 2023-01-22 08:16:59.314702: step: 220/77, loss: 0.0005941896233707666 2023-01-22 08:17:00.641533: step: 224/77, loss: 0.06122061610221863 2023-01-22 08:17:01.931301: step: 228/77, loss: 0.0010990884620696306 2023-01-22 08:17:03.175530: step: 232/77, loss: 0.021423693746328354 2023-01-22 08:17:04.531631: step: 236/77, loss: 0.12998279929161072 2023-01-22 08:17:05.867453: step: 240/77, loss: 0.023686395958065987 2023-01-22 08:17:07.223180: step: 244/77, loss: 0.012482582591474056 2023-01-22 08:17:08.512724: step: 248/77, loss: 0.005338149145245552 2023-01-22 08:17:09.819099: step: 252/77, loss: 0.00028367474442347884 2023-01-22 08:17:11.116061: step: 256/77, loss: 0.013595180585980415 2023-01-22 08:17:12.435174: step: 260/77, loss: 0.0002446919970680028 2023-01-22 08:17:13.772828: step: 264/77, loss: 0.01875019259750843 2023-01-22 08:17:15.123883: step: 268/77, loss: 0.006992523558437824 2023-01-22 08:17:16.459389: step: 272/77, loss: 0.010164832696318626 2023-01-22 08:17:17.782855: step: 276/77, loss: 0.005523860454559326 2023-01-22 08:17:19.150370: step: 280/77, loss: 0.00024588676751591265 2023-01-22 08:17:20.478628: step: 284/77, loss: 0.002962901024147868 2023-01-22 08:17:21.799062: step: 288/77, loss: 0.001597255701199174 2023-01-22 08:17:23.151855: step: 292/77, loss: 0.00734774861484766 2023-01-22 08:17:24.455901: step: 296/77, loss: 0.0006774789653718472 2023-01-22 08:17:25.795868: step: 300/77, loss: 0.016190657392144203 2023-01-22 08:17:27.107587: step: 304/77, loss: 0.007922903634607792 2023-01-22 08:17:28.411898: step: 308/77, loss: 0.02948911488056183 2023-01-22 08:17:29.703234: step: 312/77, loss: 0.02065003663301468 2023-01-22 08:17:31.026133: step: 316/77, loss: 0.004155493341386318 2023-01-22 08:17:32.362313: step: 320/77, loss: 0.0011718124151229858 2023-01-22 08:17:33.663654: step: 324/77, loss: 0.0003155616286676377 2023-01-22 08:17:34.962780: step: 328/77, loss: 0.03292486071586609 2023-01-22 08:17:36.292754: step: 332/77, loss: 0.02266281470656395 2023-01-22 08:17:37.621319: step: 336/77, loss: 0.02113211527466774 2023-01-22 08:17:38.939545: step: 340/77, loss: 0.017704568803310394 2023-01-22 08:17:40.274184: step: 344/77, loss: 0.001365205505862832 2023-01-22 08:17:41.579176: step: 348/77, loss: 9.205719106830657e-05 2023-01-22 08:17:42.941574: step: 352/77, loss: 0.039498742669820786 2023-01-22 08:17:44.245333: step: 356/77, loss: 0.014530006796121597 2023-01-22 08:17:45.582967: step: 360/77, loss: 0.019658163189888 2023-01-22 08:17:46.909282: step: 364/77, loss: 0.01574498787522316 2023-01-22 08:17:48.225154: step: 368/77, loss: 0.025630172342061996 2023-01-22 08:17:49.555569: step: 372/77, loss: 0.00019986522966064513 2023-01-22 08:17:50.926026: step: 376/77, loss: 0.0015250144060701132 2023-01-22 08:17:52.307494: step: 380/77, loss: 0.04629107564687729 2023-01-22 08:17:53.610030: step: 384/77, loss: 0.00029715109849348664 2023-01-22 08:17:54.925463: step: 388/77, loss: 0.028631635010242462 ================================================== Loss: 0.018 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Chinese: {'template': {'p': 0.9285714285714286, 'r': 0.5078125, 'f1': 0.6565656565656566}, 'slot': {'p': 0.5555555555555556, 'r': 0.013736263736263736, 'f1': 0.026809651474530835}, 'combined': 0.017602296422671762, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Korean: {'template': {'p': 0.9305555555555556, 'r': 0.5234375, 'f1': 0.6699999999999999}, 'slot': {'p': 0.5555555555555556, 'r': 0.013736263736263736, 'f1': 0.026809651474530835}, 'combined': 0.017962466487935657, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.515625, 'f1': 0.6633165829145728}, 'slot': {'p': 0.5555555555555556, 'r': 0.013736263736263736, 'f1': 0.026809651474530835}, 'combined': 0.017783286405216432, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 10 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:19:41.519505: step: 4/77, loss: 0.04615463688969612 2023-01-22 08:19:42.885578: step: 8/77, loss: 0.009117464534938335 2023-01-22 08:19:44.186399: step: 12/77, loss: 0.003044185694307089 2023-01-22 08:19:45.505305: step: 16/77, loss: 0.007869887165725231 2023-01-22 08:19:46.878865: step: 20/77, loss: 0.03679301217198372 2023-01-22 08:19:48.171418: step: 24/77, loss: 0.0019312759395688772 2023-01-22 08:19:49.473970: step: 28/77, loss: 0.010774192400276661 2023-01-22 08:19:50.782976: step: 32/77, loss: 0.029460368677973747 2023-01-22 08:19:52.119840: step: 36/77, loss: 0.0028629382140934467 2023-01-22 08:19:53.443746: step: 40/77, loss: 0.0027221275959163904 2023-01-22 08:19:54.762459: step: 44/77, loss: 0.004545790143311024 2023-01-22 08:19:56.050263: step: 48/77, loss: 0.023755038157105446 2023-01-22 08:19:57.383876: step: 52/77, loss: 2.516713266231818e-06 2023-01-22 08:19:58.729866: step: 56/77, loss: 0.0995924100279808 2023-01-22 08:20:00.028428: step: 60/77, loss: 0.011077972128987312 2023-01-22 08:20:01.294979: step: 64/77, loss: 0.002754708519205451 2023-01-22 08:20:02.586490: step: 68/77, loss: 0.056307896971702576 2023-01-22 08:20:03.862923: step: 72/77, loss: 0.0011351814027875662 2023-01-22 08:20:05.213909: step: 76/77, loss: 0.005272203125059605 2023-01-22 08:20:06.509835: step: 80/77, loss: 0.04300892353057861 2023-01-22 08:20:07.811634: step: 84/77, loss: 0.0004559697408694774 2023-01-22 08:20:09.141348: step: 88/77, loss: 0.003254904178902507 2023-01-22 08:20:10.465514: step: 92/77, loss: 0.052556414157152176 2023-01-22 08:20:11.811046: step: 96/77, loss: 0.00029989806353114545 2023-01-22 08:20:13.101427: step: 100/77, loss: 0.020337561145424843 2023-01-22 08:20:14.393583: step: 104/77, loss: 0.0005727419047616422 2023-01-22 08:20:15.742069: step: 108/77, loss: 0.004704848863184452 2023-01-22 08:20:17.077646: step: 112/77, loss: 0.007665609009563923 2023-01-22 08:20:18.339185: step: 116/77, loss: 4.7921581426635385e-05 2023-01-22 08:20:19.670433: step: 120/77, loss: 0.031694717705249786 2023-01-22 08:20:20.965963: step: 124/77, loss: 0.00014315726002678275 2023-01-22 08:20:22.311273: step: 128/77, loss: 0.015583023428916931 2023-01-22 08:20:23.628000: step: 132/77, loss: 0.02313140407204628 2023-01-22 08:20:24.949314: step: 136/77, loss: 0.03468454256653786 2023-01-22 08:20:26.289034: step: 140/77, loss: 0.002859762404114008 2023-01-22 08:20:27.628875: step: 144/77, loss: 0.00634431466460228 2023-01-22 08:20:28.991292: step: 148/77, loss: 0.005873150657862425 2023-01-22 08:20:30.339279: step: 152/77, loss: 0.0020783240906894207 2023-01-22 08:20:31.633971: step: 156/77, loss: 0.004159911070019007 2023-01-22 08:20:32.935586: step: 160/77, loss: 0.0023721123579889536 2023-01-22 08:20:34.223922: step: 164/77, loss: 0.003274687333032489 2023-01-22 08:20:35.527801: step: 168/77, loss: 0.005437308922410011 2023-01-22 08:20:36.840658: step: 172/77, loss: 0.0010760590666905046 2023-01-22 08:20:38.205943: step: 176/77, loss: 8.248248195741326e-05 2023-01-22 08:20:39.529306: step: 180/77, loss: 0.00022509020345751196 2023-01-22 08:20:40.844098: step: 184/77, loss: 0.11335831880569458 2023-01-22 08:20:42.189313: step: 188/77, loss: 0.045995332300662994 2023-01-22 08:20:43.498309: step: 192/77, loss: 1.6271269487333484e-06 2023-01-22 08:20:44.824849: step: 196/77, loss: 0.010354535654187202 2023-01-22 08:20:46.161524: step: 200/77, loss: 0.0004379808669909835 2023-01-22 08:20:47.489499: step: 204/77, loss: 0.00743110803887248 2023-01-22 08:20:48.808647: step: 208/77, loss: 0.042238425463438034 2023-01-22 08:20:50.175506: step: 212/77, loss: 0.009544878266751766 2023-01-22 08:20:51.491191: step: 216/77, loss: 0.0026782380882650614 2023-01-22 08:20:52.810505: step: 220/77, loss: 0.006199941039085388 2023-01-22 08:20:54.140097: step: 224/77, loss: 0.016368556767702103 2023-01-22 08:20:55.471616: step: 228/77, loss: 0.001818418619222939 2023-01-22 08:20:56.827230: step: 232/77, loss: 0.0004000907065346837 2023-01-22 08:20:58.213503: step: 236/77, loss: 0.019138023257255554 2023-01-22 08:20:59.533904: step: 240/77, loss: 0.007999700494110584 2023-01-22 08:21:00.845455: step: 244/77, loss: 0.001023939112201333 2023-01-22 08:21:02.153670: step: 248/77, loss: 0.016954539343714714 2023-01-22 08:21:03.469319: step: 252/77, loss: 0.006783746648579836 2023-01-22 08:21:04.847606: step: 256/77, loss: 0.006279125344008207 2023-01-22 08:21:06.147350: step: 260/77, loss: 0.013124539516866207 2023-01-22 08:21:07.542813: step: 264/77, loss: 0.02867840602993965 2023-01-22 08:21:08.881117: step: 268/77, loss: 0.06440456211566925 2023-01-22 08:21:10.164263: step: 272/77, loss: 0.0008166446350514889 2023-01-22 08:21:11.520431: step: 276/77, loss: 0.02754484862089157 2023-01-22 08:21:12.897913: step: 280/77, loss: 0.05714397504925728 2023-01-22 08:21:14.228942: step: 284/77, loss: 0.05230758339166641 2023-01-22 08:21:15.533805: step: 288/77, loss: 0.043070804327726364 2023-01-22 08:21:16.871374: step: 292/77, loss: 0.002076654462143779 2023-01-22 08:21:18.177439: step: 296/77, loss: 0.009285686537623405 2023-01-22 08:21:19.509834: step: 300/77, loss: 0.0024392385967075825 2023-01-22 08:21:20.857076: step: 304/77, loss: 0.057006798684597015 2023-01-22 08:21:22.148437: step: 308/77, loss: 0.006346026435494423 2023-01-22 08:21:23.519739: step: 312/77, loss: 0.015370495617389679 2023-01-22 08:21:24.816535: step: 316/77, loss: 0.015006231144070625 2023-01-22 08:21:26.139817: step: 320/77, loss: 0.0007069883868098259 2023-01-22 08:21:27.525092: step: 324/77, loss: 0.046590473502874374 2023-01-22 08:21:28.831509: step: 328/77, loss: 0.021039793267846107 2023-01-22 08:21:30.181008: step: 332/77, loss: 0.0467706099152565 2023-01-22 08:21:31.451280: step: 336/77, loss: 0.0013578898506239057 2023-01-22 08:21:32.756002: step: 340/77, loss: 0.0328650176525116 2023-01-22 08:21:34.074461: step: 344/77, loss: 0.00026520888786762953 2023-01-22 08:21:35.414761: step: 348/77, loss: 0.012248726561665535 2023-01-22 08:21:36.759940: step: 352/77, loss: 0.07332906872034073 2023-01-22 08:21:38.125418: step: 356/77, loss: 0.01326354593038559 2023-01-22 08:21:39.447003: step: 360/77, loss: 0.012125191278755665 2023-01-22 08:21:40.802074: step: 364/77, loss: 0.01191934198141098 2023-01-22 08:21:42.177014: step: 368/77, loss: 6.407341425074264e-05 2023-01-22 08:21:43.513635: step: 372/77, loss: 0.01194518432021141 2023-01-22 08:21:44.793864: step: 376/77, loss: 0.032971225678920746 2023-01-22 08:21:46.085545: step: 380/77, loss: 0.020709317177534103 2023-01-22 08:21:47.415122: step: 384/77, loss: 0.00015790096949785948 2023-01-22 08:21:48.777382: step: 388/77, loss: 0.0025844480842351913 ================================================== Loss: 0.017 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Chinese: {'template': {'p': 0.9836065573770492, 'r': 0.46875, 'f1': 0.6349206349206349}, 'slot': {'p': 0.6818181818181818, 'r': 0.013736263736263736, 'f1': 0.026929982046678635}, 'combined': 0.017098401299478497, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Korean: {'template': {'p': 0.967741935483871, 'r': 0.46875, 'f1': 0.631578947368421}, 'slot': {'p': 0.6521739130434783, 'r': 0.013736263736263736, 'f1': 0.026905829596412557}, 'combined': 0.01699315553457635, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Russian: {'template': {'p': 0.9833333333333333, 'r': 0.4609375, 'f1': 0.6276595744680851}, 'slot': {'p': 0.6818181818181818, 'r': 0.013736263736263736, 'f1': 0.026929982046678635}, 'combined': 0.01690286107185148, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 11 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:23:35.578449: step: 4/77, loss: 0.00036224426003172994 2023-01-22 08:23:36.838591: step: 8/77, loss: 0.00040116641321219504 2023-01-22 08:23:38.103556: step: 12/77, loss: 0.006214071996510029 2023-01-22 08:23:39.455876: step: 16/77, loss: 0.024180032312870026 2023-01-22 08:23:40.793053: step: 20/77, loss: 0.023085253313183784 2023-01-22 08:23:42.166700: step: 24/77, loss: 0.020584464073181152 2023-01-22 08:23:43.456477: step: 28/77, loss: 0.00010433314309921116 2023-01-22 08:23:44.762390: step: 32/77, loss: 0.027797989547252655 2023-01-22 08:23:46.124178: step: 36/77, loss: 0.013356706127524376 2023-01-22 08:23:47.437694: step: 40/77, loss: 0.054990656673908234 2023-01-22 08:23:48.732617: step: 44/77, loss: 0.002558821579441428 2023-01-22 08:23:50.045411: step: 48/77, loss: 0.010729584842920303 2023-01-22 08:23:51.325969: step: 52/77, loss: 0.0019385579507797956 2023-01-22 08:23:52.628683: step: 56/77, loss: 0.021367311477661133 2023-01-22 08:23:53.961320: step: 60/77, loss: 0.00020064935961272568 2023-01-22 08:23:55.317680: step: 64/77, loss: 0.09076997637748718 2023-01-22 08:23:56.642549: step: 68/77, loss: 0.008457686752080917 2023-01-22 08:23:57.971817: step: 72/77, loss: 0.011427009478211403 2023-01-22 08:23:59.247572: step: 76/77, loss: 6.459790893131867e-05 2023-01-22 08:24:00.588411: step: 80/77, loss: 0.0007144349510781467 2023-01-22 08:24:01.883285: step: 84/77, loss: 0.006785162724554539 2023-01-22 08:24:03.208799: step: 88/77, loss: 0.004394386429339647 2023-01-22 08:24:04.519869: step: 92/77, loss: 0.004328886978328228 2023-01-22 08:24:05.852065: step: 96/77, loss: 2.6869380235439166e-05 2023-01-22 08:24:07.195228: step: 100/77, loss: 0.03273552283644676 2023-01-22 08:24:08.515634: step: 104/77, loss: 0.004109046421945095 2023-01-22 08:24:09.776355: step: 108/77, loss: 0.0020677947904914618 2023-01-22 08:24:11.081208: step: 112/77, loss: 0.04142068699002266 2023-01-22 08:24:12.429065: step: 116/77, loss: 0.012463448569178581 2023-01-22 08:24:13.760698: step: 120/77, loss: 0.032798901200294495 2023-01-22 08:24:15.090163: step: 124/77, loss: 0.00578728411346674 2023-01-22 08:24:16.355324: step: 128/77, loss: 0.0483129508793354 2023-01-22 08:24:17.680421: step: 132/77, loss: 0.010041739791631699 2023-01-22 08:24:18.967030: step: 136/77, loss: 0.003338428447023034 2023-01-22 08:24:20.301009: step: 140/77, loss: 0.001665607444010675 2023-01-22 08:24:21.673443: step: 144/77, loss: 0.00014822985394857824 2023-01-22 08:24:23.029836: step: 148/77, loss: 0.019720718264579773 2023-01-22 08:24:24.322752: step: 152/77, loss: 0.0004685567400883883 2023-01-22 08:24:25.678606: step: 156/77, loss: 0.006013353355228901 2023-01-22 08:24:27.007338: step: 160/77, loss: 0.019151905551552773 2023-01-22 08:24:28.287286: step: 164/77, loss: 0.02454826608300209 2023-01-22 08:24:29.602048: step: 168/77, loss: 0.01621721312403679 2023-01-22 08:24:30.906387: step: 172/77, loss: 0.08445467799901962 2023-01-22 08:24:32.192585: step: 176/77, loss: 0.047589078545570374 2023-01-22 08:24:33.486036: step: 180/77, loss: 0.011298105120658875 2023-01-22 08:24:34.842634: step: 184/77, loss: 0.0011730461847037077 2023-01-22 08:24:36.195788: step: 188/77, loss: 0.017566129565238953 2023-01-22 08:24:37.476987: step: 192/77, loss: 0.009225753135979176 2023-01-22 08:24:38.829303: step: 196/77, loss: 0.008841684088110924 2023-01-22 08:24:40.101666: step: 200/77, loss: 0.0313703790307045 2023-01-22 08:24:41.399117: step: 204/77, loss: 0.018745075911283493 2023-01-22 08:24:42.709911: step: 208/77, loss: 0.016669992357492447 2023-01-22 08:24:44.044647: step: 212/77, loss: 0.11202440410852432 2023-01-22 08:24:45.378139: step: 216/77, loss: 0.004480287898331881 2023-01-22 08:24:46.667480: step: 220/77, loss: 0.056660331785678864 2023-01-22 08:24:48.040883: step: 224/77, loss: 0.002400397788733244 2023-01-22 08:24:49.335281: step: 228/77, loss: 0.00011129678750876337 2023-01-22 08:24:50.677186: step: 232/77, loss: 0.00022100968635641038 2023-01-22 08:24:51.995078: step: 236/77, loss: 0.006116272881627083 2023-01-22 08:24:53.358296: step: 240/77, loss: 0.0031299712136387825 2023-01-22 08:24:54.685548: step: 244/77, loss: 0.05327416956424713 2023-01-22 08:24:55.978058: step: 248/77, loss: 0.011465567164123058 2023-01-22 08:24:57.261671: step: 252/77, loss: 0.0021157297305762768 2023-01-22 08:24:58.619685: step: 256/77, loss: 0.0025393886025995016 2023-01-22 08:24:59.901926: step: 260/77, loss: 0.005280718207359314 2023-01-22 08:25:01.210569: step: 264/77, loss: 0.00016338759451173246 2023-01-22 08:25:02.537114: step: 268/77, loss: 0.030775373801589012 2023-01-22 08:25:03.819043: step: 272/77, loss: 8.42304652906023e-05 2023-01-22 08:25:05.113750: step: 276/77, loss: 0.020594937726855278 2023-01-22 08:25:06.399932: step: 280/77, loss: 0.0014261136529967189 2023-01-22 08:25:07.736497: step: 284/77, loss: 0.005536660086363554 2023-01-22 08:25:09.067661: step: 288/77, loss: 0.00045688700629398227 2023-01-22 08:25:10.401790: step: 292/77, loss: 0.009080913849174976 2023-01-22 08:25:11.715899: step: 296/77, loss: 0.00039632292464375496 2023-01-22 08:25:13.078393: step: 300/77, loss: 0.10056258738040924 2023-01-22 08:25:14.391070: step: 304/77, loss: 9.19805188459577e-06 2023-01-22 08:25:15.683957: step: 308/77, loss: 0.03023666888475418 2023-01-22 08:25:16.970695: step: 312/77, loss: 0.08584782481193542 2023-01-22 08:25:18.257766: step: 316/77, loss: 0.009489987976849079 2023-01-22 08:25:19.620522: step: 320/77, loss: 0.011331534944474697 2023-01-22 08:25:20.930760: step: 324/77, loss: 0.013444948941469193 2023-01-22 08:25:22.205687: step: 328/77, loss: 0.014004156924784184 2023-01-22 08:25:23.535172: step: 332/77, loss: 0.0021559440065175295 2023-01-22 08:25:24.823764: step: 336/77, loss: 0.023692995309829712 2023-01-22 08:25:26.083618: step: 340/77, loss: 0.04169423505663872 2023-01-22 08:25:27.384475: step: 344/77, loss: 0.004395222757011652 2023-01-22 08:25:28.736281: step: 348/77, loss: 0.0006789501057937741 2023-01-22 08:25:30.018570: step: 352/77, loss: 0.002909147646278143 2023-01-22 08:25:31.378072: step: 356/77, loss: 0.008771540597081184 2023-01-22 08:25:32.687605: step: 360/77, loss: 0.003884167643263936 2023-01-22 08:25:33.989917: step: 364/77, loss: 0.003442424349486828 2023-01-22 08:25:35.311471: step: 368/77, loss: 0.00011853533214889467 2023-01-22 08:25:36.600862: step: 372/77, loss: 0.00014925809227861464 2023-01-22 08:25:37.920861: step: 376/77, loss: 0.030259141698479652 2023-01-22 08:25:39.256192: step: 380/77, loss: 0.007407361175864935 2023-01-22 08:25:40.543374: step: 384/77, loss: 0.04872460663318634 2023-01-22 08:25:41.829741: step: 388/77, loss: 0.004564455710351467 ================================================== Loss: 0.017 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Chinese: {'template': {'p': 0.9666666666666667, 'r': 0.453125, 'f1': 0.6170212765957447}, 'slot': {'p': 0.5454545454545454, 'r': 0.01098901098901099, 'f1': 0.021543985637342913}, 'combined': 0.013293097520913712, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Korean: {'template': {'p': 0.9661016949152542, 'r': 0.4453125, 'f1': 0.6096256684491977}, 'slot': {'p': 0.5454545454545454, 'r': 0.01098901098901099, 'f1': 0.021543985637342913}, 'combined': 0.013133766645225088, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Russian: {'template': {'p': 0.9830508474576272, 'r': 0.453125, 'f1': 0.6203208556149733}, 'slot': {'p': 0.6, 'r': 0.01098901098901099, 'f1': 0.02158273381294964}, 'combined': 0.013388219905359136, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 12 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:27:28.437257: step: 4/77, loss: 6.716536154272035e-05 2023-01-22 08:27:29.785571: step: 8/77, loss: 0.14508292078971863 2023-01-22 08:27:31.100758: step: 12/77, loss: 0.0064772190526127815 2023-01-22 08:27:32.371119: step: 16/77, loss: 0.00020000609220005572 2023-01-22 08:27:33.642491: step: 20/77, loss: 0.0016990758012980223 2023-01-22 08:27:34.961970: step: 24/77, loss: 0.04430164396762848 2023-01-22 08:27:36.281426: step: 28/77, loss: 0.02811635658144951 2023-01-22 08:27:37.626737: step: 32/77, loss: 0.0021595361176878214 2023-01-22 08:27:38.935550: step: 36/77, loss: 0.0029978426173329353 2023-01-22 08:27:40.219402: step: 40/77, loss: 0.006254137028008699 2023-01-22 08:27:41.578099: step: 44/77, loss: 0.006620858795940876 2023-01-22 08:27:42.861076: step: 48/77, loss: 0.03224121034145355 2023-01-22 08:27:44.169175: step: 52/77, loss: 0.0015977731673046947 2023-01-22 08:27:45.463352: step: 56/77, loss: 0.001971067627891898 2023-01-22 08:27:46.757252: step: 60/77, loss: 0.003533907700330019 2023-01-22 08:27:48.004637: step: 64/77, loss: 0.028958367183804512 2023-01-22 08:27:49.291826: step: 68/77, loss: 0.01046693790704012 2023-01-22 08:27:50.593757: step: 72/77, loss: 0.0007335816044360399 2023-01-22 08:27:51.875699: step: 76/77, loss: 0.01121562160551548 2023-01-22 08:27:53.210239: step: 80/77, loss: 0.01169790979474783 2023-01-22 08:27:54.536719: step: 84/77, loss: 0.0018057833658531308 2023-01-22 08:27:55.849019: step: 88/77, loss: 0.005374482832849026 2023-01-22 08:27:57.156891: step: 92/77, loss: 0.024878591299057007 2023-01-22 08:27:58.491334: step: 96/77, loss: 0.0021314818877726793 2023-01-22 08:27:59.775595: step: 100/77, loss: 0.0036977045238018036 2023-01-22 08:28:01.114057: step: 104/77, loss: 0.003813137300312519 2023-01-22 08:28:02.429304: step: 108/77, loss: 0.0020447312854230404 2023-01-22 08:28:03.768135: step: 112/77, loss: 0.057621728628873825 2023-01-22 08:28:05.143438: step: 116/77, loss: 0.002673014998435974 2023-01-22 08:28:06.470974: step: 120/77, loss: 0.06338480114936829 2023-01-22 08:28:07.816783: step: 124/77, loss: 0.02054738998413086 2023-01-22 08:28:09.110826: step: 128/77, loss: 0.018381303176283836 2023-01-22 08:28:10.449140: step: 132/77, loss: 0.01054394245147705 2023-01-22 08:28:11.738686: step: 136/77, loss: 0.00022755435202270746 2023-01-22 08:28:13.055960: step: 140/77, loss: 0.0012879862915724516 2023-01-22 08:28:14.423845: step: 144/77, loss: 0.03543628752231598 2023-01-22 08:28:15.716589: step: 148/77, loss: 0.034065961837768555 2023-01-22 08:28:17.036159: step: 152/77, loss: 0.0007926194812171161 2023-01-22 08:28:18.370767: step: 156/77, loss: 0.019189316779375076 2023-01-22 08:28:19.691789: step: 160/77, loss: 0.005916177295148373 2023-01-22 08:28:21.016580: step: 164/77, loss: 0.006899761967360973 2023-01-22 08:28:22.328690: step: 168/77, loss: 0.01533865462988615 2023-01-22 08:28:23.637150: step: 172/77, loss: 0.0008392666350118816 2023-01-22 08:28:24.942681: step: 176/77, loss: 0.0041745989583432674 2023-01-22 08:28:26.280899: step: 180/77, loss: 0.024267667904496193 2023-01-22 08:28:27.588522: step: 184/77, loss: 0.012002028524875641 2023-01-22 08:28:28.889514: step: 188/77, loss: 0.000731311272829771 2023-01-22 08:28:30.228691: step: 192/77, loss: 0.0017883798573166132 2023-01-22 08:28:31.560225: step: 196/77, loss: 0.006359480787068605 2023-01-22 08:28:32.898849: step: 200/77, loss: 0.0010414841817691922 2023-01-22 08:28:34.236300: step: 204/77, loss: 0.004932164680212736 2023-01-22 08:28:35.556800: step: 208/77, loss: 0.011990321800112724 2023-01-22 08:28:36.867716: step: 212/77, loss: 0.05553290247917175 2023-01-22 08:28:38.238516: step: 216/77, loss: 0.0007119431393221021 2023-01-22 08:28:39.562593: step: 220/77, loss: 1.3023453675486962e-06 2023-01-22 08:28:40.907241: step: 224/77, loss: 0.004885970614850521 2023-01-22 08:28:42.199393: step: 228/77, loss: 0.00034167556441389024 2023-01-22 08:28:43.576782: step: 232/77, loss: 0.03612995520234108 2023-01-22 08:28:44.884639: step: 236/77, loss: 0.00044193086796440184 2023-01-22 08:28:46.229040: step: 240/77, loss: 0.00014060882676858455 2023-01-22 08:28:47.514121: step: 244/77, loss: 0.012359784916043282 2023-01-22 08:28:48.834045: step: 248/77, loss: 8.537257235730067e-05 2023-01-22 08:28:50.142228: step: 252/77, loss: 0.0018495420226827264 2023-01-22 08:28:51.496426: step: 256/77, loss: 0.012446880340576172 2023-01-22 08:28:52.785880: step: 260/77, loss: 0.00018789272871799767 2023-01-22 08:28:54.089363: step: 264/77, loss: 0.036935579031705856 2023-01-22 08:28:55.390901: step: 268/77, loss: 0.004805781878530979 2023-01-22 08:28:56.731066: step: 272/77, loss: 0.051274560391902924 2023-01-22 08:28:58.068410: step: 276/77, loss: 0.005366981960833073 2023-01-22 08:28:59.387549: step: 280/77, loss: 0.011775230057537556 2023-01-22 08:29:00.743984: step: 284/77, loss: 0.0029582062270492315 2023-01-22 08:29:02.071990: step: 288/77, loss: 0.007654991932213306 2023-01-22 08:29:03.366876: step: 292/77, loss: 0.012157164514064789 2023-01-22 08:29:04.657613: step: 296/77, loss: 0.011476476676762104 2023-01-22 08:29:05.999264: step: 300/77, loss: 0.0006907251081429422 2023-01-22 08:29:07.325394: step: 304/77, loss: 0.042078647762537 2023-01-22 08:29:08.623708: step: 308/77, loss: 0.0014963550493121147 2023-01-22 08:29:09.998468: step: 312/77, loss: 0.006883785128593445 2023-01-22 08:29:11.262617: step: 316/77, loss: 0.01745801977813244 2023-01-22 08:29:12.531199: step: 320/77, loss: 0.008958478458225727 2023-01-22 08:29:13.856857: step: 324/77, loss: 0.01516663283109665 2023-01-22 08:29:15.155837: step: 328/77, loss: 0.0009179931366816163 2023-01-22 08:29:16.513853: step: 332/77, loss: 0.0005924751749262214 2023-01-22 08:29:17.851477: step: 336/77, loss: 0.028950592502951622 2023-01-22 08:29:19.167349: step: 340/77, loss: 0.010792708955705166 2023-01-22 08:29:20.450597: step: 344/77, loss: 0.0005969545454718173 2023-01-22 08:29:21.775367: step: 348/77, loss: 0.0005601674783974886 2023-01-22 08:29:23.064589: step: 352/77, loss: 0.0011363797821104527 2023-01-22 08:29:24.390231: step: 356/77, loss: 0.027163442224264145 2023-01-22 08:29:25.710544: step: 360/77, loss: 0.011760690249502659 2023-01-22 08:29:27.047941: step: 364/77, loss: 0.0001635812222957611 2023-01-22 08:29:28.348106: step: 368/77, loss: 0.03414410725235939 2023-01-22 08:29:29.692309: step: 372/77, loss: 0.007519087288528681 2023-01-22 08:29:31.002994: step: 376/77, loss: 2.949499867099803e-05 2023-01-22 08:29:32.321669: step: 380/77, loss: 0.03247775137424469 2023-01-22 08:29:33.644476: step: 384/77, loss: 0.005735212471336126 2023-01-22 08:29:34.955807: step: 388/77, loss: 0.007732848171144724 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 12} Test Chinese: {'template': {'p': 0.96875, 'r': 0.484375, 'f1': 0.6458333333333334}, 'slot': {'p': 0.68, 'r': 0.015567765567765568, 'f1': 0.030438675022381376}, 'combined': 0.01965831095195464, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 12} Test Korean: {'template': {'p': 0.96875, 'r': 0.484375, 'f1': 0.6458333333333334}, 'slot': {'p': 0.68, 'r': 0.015567765567765568, 'f1': 0.030438675022381376}, 'combined': 0.01965831095195464, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 12} Test Russian: {'template': {'p': 0.96875, 'r': 0.484375, 'f1': 0.6458333333333334}, 'slot': {'p': 0.68, 'r': 0.015567765567765568, 'f1': 0.030438675022381376}, 'combined': 0.01965831095195464, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 13 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:31:21.692310: step: 4/77, loss: 0.009995035827159882 2023-01-22 08:31:22.980638: step: 8/77, loss: 0.00022118259221315384 2023-01-22 08:31:24.328883: step: 12/77, loss: 0.002348837675526738 2023-01-22 08:31:25.611331: step: 16/77, loss: 0.015405723825097084 2023-01-22 08:31:26.863284: step: 20/77, loss: 0.0051343501545488834 2023-01-22 08:31:28.172806: step: 24/77, loss: 0.006473531015217304 2023-01-22 08:31:29.489555: step: 28/77, loss: 0.054217107594013214 2023-01-22 08:31:30.838651: step: 32/77, loss: 1.1163228919031098e-05 2023-01-22 08:31:32.152128: step: 36/77, loss: 0.020336586982011795 2023-01-22 08:31:33.437558: step: 40/77, loss: 0.05220355466008186 2023-01-22 08:31:34.721949: step: 44/77, loss: 4.464634184841998e-05 2023-01-22 08:31:35.996496: step: 48/77, loss: 0.025243086740374565 2023-01-22 08:31:37.322843: step: 52/77, loss: 0.05261071026325226 2023-01-22 08:31:38.619921: step: 56/77, loss: 0.01674208790063858 2023-01-22 08:31:39.954025: step: 60/77, loss: 5.1860555686289445e-05 2023-01-22 08:31:41.250169: step: 64/77, loss: 0.00012300141679588705 2023-01-22 08:31:42.570190: step: 68/77, loss: 0.006085620261728764 2023-01-22 08:31:43.866085: step: 72/77, loss: 2.66003335127607e-05 2023-01-22 08:31:45.189733: step: 76/77, loss: 0.00011043615086236969 2023-01-22 08:31:46.455174: step: 80/77, loss: 0.0026714566629379988 2023-01-22 08:31:47.776777: step: 84/77, loss: 0.03106573037803173 2023-01-22 08:31:49.056960: step: 88/77, loss: 0.031264789402484894 2023-01-22 08:31:50.362424: step: 92/77, loss: 0.001054988824762404 2023-01-22 08:31:51.702125: step: 96/77, loss: 0.0007731158402748406 2023-01-22 08:31:53.029957: step: 100/77, loss: 0.0005604913458228111 2023-01-22 08:31:54.353020: step: 104/77, loss: 0.007275127340108156 2023-01-22 08:31:55.646299: step: 108/77, loss: 9.56045332713984e-05 2023-01-22 08:31:56.964836: step: 112/77, loss: 0.0005544309969991446 2023-01-22 08:31:58.307810: step: 116/77, loss: 0.02516276203095913 2023-01-22 08:31:59.630589: step: 120/77, loss: 0.0228984784334898 2023-01-22 08:32:00.921390: step: 124/77, loss: 0.005786824971437454 2023-01-22 08:32:02.236100: step: 128/77, loss: 0.0007653665379621089 2023-01-22 08:32:03.538098: step: 132/77, loss: 0.05804044008255005 2023-01-22 08:32:04.829304: step: 136/77, loss: 0.0004368829831946641 2023-01-22 08:32:06.165614: step: 140/77, loss: 0.01609526388347149 2023-01-22 08:32:07.459517: step: 144/77, loss: 0.009050913155078888 2023-01-22 08:32:08.771968: step: 148/77, loss: 0.004335467703640461 2023-01-22 08:32:10.079984: step: 152/77, loss: 0.00035594069049693644 2023-01-22 08:32:11.422100: step: 156/77, loss: 0.005551069974899292 2023-01-22 08:32:12.744106: step: 160/77, loss: 0.10360582917928696 2023-01-22 08:32:14.065136: step: 164/77, loss: 0.006701688282191753 2023-01-22 08:32:15.360931: step: 168/77, loss: 0.00022893882123753428 2023-01-22 08:32:16.649136: step: 172/77, loss: 0.02077857404947281 2023-01-22 08:32:17.986222: step: 176/77, loss: 0.015844902023673058 2023-01-22 08:32:19.245998: step: 180/77, loss: 0.04306226223707199 2023-01-22 08:32:20.576490: step: 184/77, loss: 0.04048726707696915 2023-01-22 08:32:21.816318: step: 188/77, loss: 0.004294191021472216 2023-01-22 08:32:23.114643: step: 192/77, loss: 0.003412810154259205 2023-01-22 08:32:24.397248: step: 196/77, loss: 0.014952167868614197 2023-01-22 08:32:25.716938: step: 200/77, loss: 0.00016821689496282488 2023-01-22 08:32:27.056634: step: 204/77, loss: 0.003225933061912656 2023-01-22 08:32:28.369845: step: 208/77, loss: 0.030794963240623474 2023-01-22 08:32:29.704466: step: 212/77, loss: 0.0027160472236573696 2023-01-22 08:32:31.009649: step: 216/77, loss: 0.0658402293920517 2023-01-22 08:32:32.291382: step: 220/77, loss: 0.0005345541285350919 2023-01-22 08:32:33.601767: step: 224/77, loss: 0.0006551474798470736 2023-01-22 08:32:34.908358: step: 228/77, loss: 0.0009930375963449478 2023-01-22 08:32:36.222351: step: 232/77, loss: 0.00024232860596384853 2023-01-22 08:32:37.560591: step: 236/77, loss: 0.0362166166305542 2023-01-22 08:32:38.883973: step: 240/77, loss: 0.037811048328876495 2023-01-22 08:32:40.217178: step: 244/77, loss: 0.04655441641807556 2023-01-22 08:32:41.533418: step: 248/77, loss: 0.006816321052610874 2023-01-22 08:32:42.918081: step: 252/77, loss: 0.0006618615007027984 2023-01-22 08:32:44.274820: step: 256/77, loss: 0.047126851975917816 2023-01-22 08:32:45.567794: step: 260/77, loss: 0.0028675626963377 2023-01-22 08:32:46.921288: step: 264/77, loss: 0.006751200184226036 2023-01-22 08:32:48.275152: step: 268/77, loss: 0.029957851395010948 2023-01-22 08:32:49.584943: step: 272/77, loss: 0.009517940692603588 2023-01-22 08:32:50.905340: step: 276/77, loss: 0.00022375909611582756 2023-01-22 08:32:52.206493: step: 280/77, loss: 0.0030457484535872936 2023-01-22 08:32:53.493034: step: 284/77, loss: 0.006273590959608555 2023-01-22 08:32:54.846101: step: 288/77, loss: 0.0011199575383216143 2023-01-22 08:32:56.139402: step: 292/77, loss: 0.0001741000305628404 2023-01-22 08:32:57.428915: step: 296/77, loss: 0.0022353699896484613 2023-01-22 08:32:58.688434: step: 300/77, loss: 0.0030100643634796143 2023-01-22 08:32:59.995971: step: 304/77, loss: 0.08724191784858704 2023-01-22 08:33:01.264149: step: 308/77, loss: 0.004720780998468399 2023-01-22 08:33:02.570598: step: 312/77, loss: 0.00046639557695016265 2023-01-22 08:33:03.855607: step: 316/77, loss: 0.00041480723302811384 2023-01-22 08:33:05.213859: step: 320/77, loss: 0.01986285112798214 2023-01-22 08:33:06.533128: step: 324/77, loss: 0.026395024731755257 2023-01-22 08:33:07.848406: step: 328/77, loss: 0.036015816032886505 2023-01-22 08:33:09.186936: step: 332/77, loss: 0.0005104574374854565 2023-01-22 08:33:10.477918: step: 336/77, loss: 0.010636835359036922 2023-01-22 08:33:11.718677: step: 340/77, loss: 0.018415292724967003 2023-01-22 08:33:13.016723: step: 344/77, loss: 0.0005754455924034119 2023-01-22 08:33:14.357660: step: 348/77, loss: 0.0002713290450628847 2023-01-22 08:33:15.705662: step: 352/77, loss: 3.4492208214942366e-05 2023-01-22 08:33:17.032702: step: 356/77, loss: 0.04332399368286133 2023-01-22 08:33:18.314759: step: 360/77, loss: 0.021294377744197845 2023-01-22 08:33:19.677020: step: 364/77, loss: 0.0004992512986063957 2023-01-22 08:33:20.993100: step: 368/77, loss: 0.0013733096420764923 2023-01-22 08:33:22.310514: step: 372/77, loss: 0.0025527013931423426 2023-01-22 08:33:23.655235: step: 376/77, loss: 2.774977110675536e-05 2023-01-22 08:33:25.008217: step: 380/77, loss: 0.0006133266724646091 2023-01-22 08:33:26.335801: step: 384/77, loss: 0.011961428448557854 2023-01-22 08:33:27.635893: step: 388/77, loss: 0.0016036704182624817 ================================================== Loss: 0.014 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.546875, 'f1': 0.7}, 'slot': {'p': 0.6129032258064516, 'r': 0.0173992673992674, 'f1': 0.033837934105075684}, 'combined': 0.02368655387355298, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Korean: {'template': {'p': 0.9710144927536232, 'r': 0.5234375, 'f1': 0.6802030456852792}, 'slot': {'p': 0.6333333333333333, 'r': 0.0173992673992674, 'f1': 0.0338680926916221}, 'combined': 0.0230371798003927, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Russian: {'template': {'p': 0.9714285714285714, 'r': 0.53125, 'f1': 0.6868686868686867}, 'slot': {'p': 0.6071428571428571, 'r': 0.015567765567765568, 'f1': 0.030357142857142853}, 'combined': 0.020851370851370846, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 14 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:35:14.337538: step: 4/77, loss: 0.0004200860275886953 2023-01-22 08:35:15.590219: step: 8/77, loss: 0.0004749511426780373 2023-01-22 08:35:16.874526: step: 12/77, loss: 0.01922205276787281 2023-01-22 08:35:18.147297: step: 16/77, loss: 0.0049668727442622185 2023-01-22 08:35:19.490143: step: 20/77, loss: 0.00548710860311985 2023-01-22 08:35:20.773501: step: 24/77, loss: 0.009446317330002785 2023-01-22 08:35:22.088276: step: 28/77, loss: 0.056420616805553436 2023-01-22 08:35:23.380051: step: 32/77, loss: 0.0008900321554392576 2023-01-22 08:35:24.748408: step: 36/77, loss: 0.010750795714557171 2023-01-22 08:35:26.086913: step: 40/77, loss: 0.03467971459031105 2023-01-22 08:35:27.395555: step: 44/77, loss: 1.3400214811554179e-05 2023-01-22 08:35:28.694868: step: 48/77, loss: 0.04462679103016853 2023-01-22 08:35:30.043685: step: 52/77, loss: 0.0011556717799976468 2023-01-22 08:35:31.330217: step: 56/77, loss: 0.004821427166461945 2023-01-22 08:35:32.583984: step: 60/77, loss: 0.0010934629244729877 2023-01-22 08:35:33.869020: step: 64/77, loss: 0.002542417263612151 2023-01-22 08:35:35.152993: step: 68/77, loss: 0.054598476737737656 2023-01-22 08:35:36.458635: step: 72/77, loss: 1.2503600373747759e-05 2023-01-22 08:35:37.741346: step: 76/77, loss: 0.01632210798561573 2023-01-22 08:35:39.083655: step: 80/77, loss: 0.0004365661588963121 2023-01-22 08:35:40.387319: step: 84/77, loss: 0.009737212210893631 2023-01-22 08:35:41.670127: step: 88/77, loss: 0.005879779811948538 2023-01-22 08:35:43.079342: step: 92/77, loss: 0.002779336180537939 2023-01-22 08:35:44.387462: step: 96/77, loss: 0.0002582361048553139 2023-01-22 08:35:45.725931: step: 100/77, loss: 0.003720135660842061 2023-01-22 08:35:47.014843: step: 104/77, loss: 0.025107156485319138 2023-01-22 08:35:48.292047: step: 108/77, loss: 0.015690365806221962 2023-01-22 08:35:49.625070: step: 112/77, loss: 0.0029986624140292406 2023-01-22 08:35:50.969106: step: 116/77, loss: 0.0048570032231509686 2023-01-22 08:35:52.311927: step: 120/77, loss: 0.00633400259539485 2023-01-22 08:35:53.616307: step: 124/77, loss: 0.005726975854486227 2023-01-22 08:35:54.896096: step: 128/77, loss: 0.0002349330607103184 2023-01-22 08:35:56.224521: step: 132/77, loss: 0.06281039118766785 2023-01-22 08:35:57.515381: step: 136/77, loss: 0.013547773472964764 2023-01-22 08:35:58.866378: step: 140/77, loss: 0.005808471702039242 2023-01-22 08:36:00.193283: step: 144/77, loss: 0.027540259063243866 2023-01-22 08:36:01.508259: step: 148/77, loss: 6.303073314484209e-05 2023-01-22 08:36:02.814926: step: 152/77, loss: 0.023322701454162598 2023-01-22 08:36:04.103843: step: 156/77, loss: 0.02620372734963894 2023-01-22 08:36:05.409630: step: 160/77, loss: 0.007477977313101292 2023-01-22 08:36:06.740927: step: 164/77, loss: 0.0016014814609661698 2023-01-22 08:36:08.055936: step: 168/77, loss: 0.02817235141992569 2023-01-22 08:36:09.367785: step: 172/77, loss: 0.0029439725913107395 2023-01-22 08:36:10.670949: step: 176/77, loss: 0.012270371429622173 2023-01-22 08:36:11.983994: step: 180/77, loss: 0.01205486711114645 2023-01-22 08:36:13.312072: step: 184/77, loss: 0.00012254132889211178 2023-01-22 08:36:14.610186: step: 188/77, loss: 0.002939145313575864 2023-01-22 08:36:15.905955: step: 192/77, loss: 5.6999630032805726e-05 2023-01-22 08:36:17.178652: step: 196/77, loss: 0.07307901233434677 2023-01-22 08:36:18.480243: step: 200/77, loss: 0.0009206432150676847 2023-01-22 08:36:19.759482: step: 204/77, loss: 0.00030497522675432265 2023-01-22 08:36:21.114032: step: 208/77, loss: 0.0005431080353446305 2023-01-22 08:36:22.391453: step: 212/77, loss: 0.00023502598924096674 2023-01-22 08:36:23.732955: step: 216/77, loss: 0.00036915275268256664 2023-01-22 08:36:25.061618: step: 220/77, loss: 0.02349918708205223 2023-01-22 08:36:26.355467: step: 224/77, loss: 0.0042792558670043945 2023-01-22 08:36:27.650626: step: 228/77, loss: 0.0006924453191459179 2023-01-22 08:36:28.966785: step: 232/77, loss: 0.00022316054673865438 2023-01-22 08:36:30.322550: step: 236/77, loss: 0.008597703650593758 2023-01-22 08:36:31.652267: step: 240/77, loss: 0.0010497706243768334 2023-01-22 08:36:32.960506: step: 244/77, loss: 0.005961691495031118 2023-01-22 08:36:34.302488: step: 248/77, loss: 0.00014646831550635397 2023-01-22 08:36:35.630487: step: 252/77, loss: 0.06768776476383209 2023-01-22 08:36:36.979614: step: 256/77, loss: 0.0005682706250809133 2023-01-22 08:36:38.378254: step: 260/77, loss: 0.003363983705639839 2023-01-22 08:36:39.718494: step: 264/77, loss: 2.314460652996786e-05 2023-01-22 08:36:41.017489: step: 268/77, loss: 0.024550795555114746 2023-01-22 08:36:42.301889: step: 272/77, loss: 0.00848584808409214 2023-01-22 08:36:43.604246: step: 276/77, loss: 6.662925443379208e-05 2023-01-22 08:36:44.927329: step: 280/77, loss: 0.024089567363262177 2023-01-22 08:36:46.233570: step: 284/77, loss: 0.007724351715296507 2023-01-22 08:36:47.570350: step: 288/77, loss: 0.0013219267129898071 2023-01-22 08:36:48.877223: step: 292/77, loss: 0.015313958749175072 2023-01-22 08:36:50.245092: step: 296/77, loss: 0.014368615113198757 2023-01-22 08:36:51.558544: step: 300/77, loss: 0.04230882599949837 2023-01-22 08:36:52.908710: step: 304/77, loss: 0.0017491618636995554 2023-01-22 08:36:54.172796: step: 308/77, loss: 0.00044850510312244296 2023-01-22 08:36:55.471177: step: 312/77, loss: 0.001114065176807344 2023-01-22 08:36:56.819517: step: 316/77, loss: 0.00840886402875185 2023-01-22 08:36:58.184850: step: 320/77, loss: 0.006510584149509668 2023-01-22 08:36:59.486327: step: 324/77, loss: 0.0036016865633428097 2023-01-22 08:37:00.866836: step: 328/77, loss: 0.003660577815026045 2023-01-22 08:37:02.208497: step: 332/77, loss: 0.018879014998674393 2023-01-22 08:37:03.567519: step: 336/77, loss: 0.046105917543172836 2023-01-22 08:37:04.940302: step: 340/77, loss: 0.0011975467205047607 2023-01-22 08:37:06.240697: step: 344/77, loss: 0.0047290450893342495 2023-01-22 08:37:07.584310: step: 348/77, loss: 0.0072173988446593285 2023-01-22 08:37:08.904155: step: 352/77, loss: 0.021729473024606705 2023-01-22 08:37:10.203637: step: 356/77, loss: 0.001801485545001924 2023-01-22 08:37:11.540759: step: 360/77, loss: 0.004448288585990667 2023-01-22 08:37:12.877003: step: 364/77, loss: 0.018222050741314888 2023-01-22 08:37:14.180161: step: 368/77, loss: 0.0042145997285842896 2023-01-22 08:37:15.509079: step: 372/77, loss: 0.011724326759576797 2023-01-22 08:37:16.864609: step: 376/77, loss: 0.0015771070029586554 2023-01-22 08:37:18.258237: step: 380/77, loss: 0.000674558337777853 2023-01-22 08:37:19.611418: step: 384/77, loss: 0.009776659309864044 2023-01-22 08:37:20.941366: step: 388/77, loss: 0.0007911527063697577 ================================================== Loss: 0.011 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 14} Test Chinese: {'template': {'p': 0.9538461538461539, 'r': 0.484375, 'f1': 0.6424870466321243}, 'slot': {'p': 0.53125, 'r': 0.015567765567765568, 'f1': 0.0302491103202847}, 'combined': 0.019434661552929028, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 14} Test Korean: {'template': {'p': 0.9682539682539683, 'r': 0.4765625, 'f1': 0.6387434554973821}, 'slot': {'p': 0.5666666666666667, 'r': 0.015567765567765568, 'f1': 0.030303030303030307}, 'combined': 0.01935586228779946, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 14} Test Russian: {'template': {'p': 0.9682539682539683, 'r': 0.4765625, 'f1': 0.6387434554973821}, 'slot': {'p': 0.5666666666666667, 'r': 0.015567765567765568, 'f1': 0.030303030303030307}, 'combined': 0.01935586228779946, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 15 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:39:07.559322: step: 4/77, loss: 0.010837767273187637 2023-01-22 08:39:08.871939: step: 8/77, loss: 0.0006001390866003931 2023-01-22 08:39:10.246176: step: 12/77, loss: 0.0002572540252003819 2023-01-22 08:39:11.584568: step: 16/77, loss: 0.0035696025006473064 2023-01-22 08:39:12.895861: step: 20/77, loss: 0.0007004659855738282 2023-01-22 08:39:14.168829: step: 24/77, loss: 0.0023328056558966637 2023-01-22 08:39:15.461873: step: 28/77, loss: 0.00011439307127147913 2023-01-22 08:39:16.803863: step: 32/77, loss: 0.005891487468034029 2023-01-22 08:39:18.046345: step: 36/77, loss: 0.00023794885782990605 2023-01-22 08:39:19.341671: step: 40/77, loss: 7.563876715721563e-05 2023-01-22 08:39:20.648037: step: 44/77, loss: 0.0006197237526066601 2023-01-22 08:39:22.018683: step: 48/77, loss: 0.00023501500254496932 2023-01-22 08:39:23.329659: step: 52/77, loss: 0.013790091499686241 2023-01-22 08:39:24.646275: step: 56/77, loss: 0.028487298637628555 2023-01-22 08:39:25.974990: step: 60/77, loss: 1.4275335161073599e-05 2023-01-22 08:39:27.224282: step: 64/77, loss: 1.5512880054302514e-05 2023-01-22 08:39:28.536196: step: 68/77, loss: 0.02364487014710903 2023-01-22 08:39:29.864960: step: 72/77, loss: 0.002446085214614868 2023-01-22 08:39:31.140251: step: 76/77, loss: 0.02038494497537613 2023-01-22 08:39:32.461393: step: 80/77, loss: 0.0563809759914875 2023-01-22 08:39:33.775958: step: 84/77, loss: 0.00593971973285079 2023-01-22 08:39:35.139802: step: 88/77, loss: 8.265776705229655e-05 2023-01-22 08:39:36.500073: step: 92/77, loss: 0.02026337757706642 2023-01-22 08:39:37.850316: step: 96/77, loss: 0.0005820993683300912 2023-01-22 08:39:39.151994: step: 100/77, loss: 1.4649514923803508e-05 2023-01-22 08:39:40.428987: step: 104/77, loss: 6.332544217002578e-06 2023-01-22 08:39:41.713861: step: 108/77, loss: 0.20827950537204742 2023-01-22 08:39:43.092317: step: 112/77, loss: 0.0017105141887441278 2023-01-22 08:39:44.403322: step: 116/77, loss: 0.0018553459085524082 2023-01-22 08:39:45.725230: step: 120/77, loss: 0.00021237392502371222 2023-01-22 08:39:47.046766: step: 124/77, loss: 2.5977966288337484e-05 2023-01-22 08:39:48.364393: step: 128/77, loss: 9.777469676919281e-05 2023-01-22 08:39:49.672541: step: 132/77, loss: 1.1249124327150639e-05 2023-01-22 08:39:50.992111: step: 136/77, loss: 5.5485616030637175e-06 2023-01-22 08:39:52.330257: step: 140/77, loss: 0.00467675132676959 2023-01-22 08:39:53.659520: step: 144/77, loss: 0.025886178016662598 2023-01-22 08:39:54.968925: step: 148/77, loss: 0.023800842463970184 2023-01-22 08:39:56.269442: step: 152/77, loss: 0.0014288400998339057 2023-01-22 08:39:57.562777: step: 156/77, loss: 0.002464765915647149 2023-01-22 08:39:58.919117: step: 160/77, loss: 0.00029669213108718395 2023-01-22 08:40:00.241930: step: 164/77, loss: 0.006217610090970993 2023-01-22 08:40:01.542765: step: 168/77, loss: 0.0005582318408414721 2023-01-22 08:40:02.848481: step: 172/77, loss: 0.004611868876963854 2023-01-22 08:40:04.177684: step: 176/77, loss: 0.004949961323291063 2023-01-22 08:40:05.477013: step: 180/77, loss: 0.021964555606245995 2023-01-22 08:40:06.779599: step: 184/77, loss: 0.0010839662281796336 2023-01-22 08:40:08.133968: step: 188/77, loss: 0.11259245872497559 2023-01-22 08:40:09.485639: step: 192/77, loss: 0.03340409696102142 2023-01-22 08:40:10.814782: step: 196/77, loss: 0.032646629959344864 2023-01-22 08:40:12.157567: step: 200/77, loss: 0.01984964869916439 2023-01-22 08:40:13.458320: step: 204/77, loss: 0.0004216528031975031 2023-01-22 08:40:14.769165: step: 208/77, loss: 0.0052557592280209064 2023-01-22 08:40:16.130952: step: 212/77, loss: 0.029595471918582916 2023-01-22 08:40:17.457674: step: 216/77, loss: 0.0032760680187493563 2023-01-22 08:40:18.750467: step: 220/77, loss: 0.019138285890221596 2023-01-22 08:40:20.030159: step: 224/77, loss: 0.02345498651266098 2023-01-22 08:40:21.377952: step: 228/77, loss: 0.0026856400072574615 2023-01-22 08:40:22.718431: step: 232/77, loss: 0.016967257484793663 2023-01-22 08:40:24.018241: step: 236/77, loss: 0.00011248209921177477 2023-01-22 08:40:25.339281: step: 240/77, loss: 0.0003898591094184667 2023-01-22 08:40:26.692018: step: 244/77, loss: 0.04799313098192215 2023-01-22 08:40:28.058564: step: 248/77, loss: 0.009683486074209213 2023-01-22 08:40:29.375663: step: 252/77, loss: 0.0008936700760386884 2023-01-22 08:40:30.685122: step: 256/77, loss: 0.00401071785017848 2023-01-22 08:40:31.999373: step: 260/77, loss: 0.011171936057507992 2023-01-22 08:40:33.279195: step: 264/77, loss: 5.710707409889437e-05 2023-01-22 08:40:34.614386: step: 268/77, loss: 0.02173326350748539 2023-01-22 08:40:35.963475: step: 272/77, loss: 0.00019250279001425952 2023-01-22 08:40:37.270580: step: 276/77, loss: 0.00044429523404687643 2023-01-22 08:40:38.625569: step: 280/77, loss: 0.015425225719809532 2023-01-22 08:40:39.986996: step: 284/77, loss: 1.926892036863137e-05 2023-01-22 08:40:41.228399: step: 288/77, loss: 0.006432825233787298 2023-01-22 08:40:42.558695: step: 292/77, loss: 0.00099134910851717 2023-01-22 08:40:43.842310: step: 296/77, loss: 7.043761434033513e-05 2023-01-22 08:40:45.146958: step: 300/77, loss: 0.0030199948232620955 2023-01-22 08:40:46.452366: step: 304/77, loss: 0.0016736382385715842 2023-01-22 08:40:47.781033: step: 308/77, loss: 0.014939763583242893 2023-01-22 08:40:49.106337: step: 312/77, loss: 7.201347762020305e-05 2023-01-22 08:40:50.394646: step: 316/77, loss: 0.03253569081425667 2023-01-22 08:40:51.766014: step: 320/77, loss: 0.04321937635540962 2023-01-22 08:40:53.069669: step: 324/77, loss: 0.056001633405685425 2023-01-22 08:40:54.395436: step: 328/77, loss: 0.0018117651343345642 2023-01-22 08:40:55.743172: step: 332/77, loss: 8.461129255010746e-06 2023-01-22 08:40:57.034644: step: 336/77, loss: 1.3149092410458252e-05 2023-01-22 08:40:58.295843: step: 340/77, loss: 0.0001768352958606556 2023-01-22 08:40:59.616966: step: 344/77, loss: 0.000923643063288182 2023-01-22 08:41:00.922564: step: 348/77, loss: 0.03135452792048454 2023-01-22 08:41:02.194286: step: 352/77, loss: 1.5779828572703991e-06 2023-01-22 08:41:03.523310: step: 356/77, loss: 0.0012817180249840021 2023-01-22 08:41:04.867609: step: 360/77, loss: 0.010822507552802563 2023-01-22 08:41:06.157588: step: 364/77, loss: 0.04603930190205574 2023-01-22 08:41:07.465135: step: 368/77, loss: 0.1464909315109253 2023-01-22 08:41:08.742828: step: 372/77, loss: 0.00020046159625053406 2023-01-22 08:41:10.078381: step: 376/77, loss: 0.0011036631185561419 2023-01-22 08:41:11.413680: step: 380/77, loss: 0.0055919550359249115 2023-01-22 08:41:12.750356: step: 384/77, loss: 0.020800791680812836 2023-01-22 08:41:14.034520: step: 388/77, loss: 0.029528755694627762 ================================================== Loss: 0.014 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Chinese: {'template': {'p': 0.9545454545454546, 'r': 0.4921875, 'f1': 0.6494845360824743}, 'slot': {'p': 0.6071428571428571, 'r': 0.015567765567765568, 'f1': 0.030357142857142853}, 'combined': 0.019716494845360824, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Korean: {'template': {'p': 0.9264705882352942, 'r': 0.4921875, 'f1': 0.6428571428571428}, 'slot': {'p': 0.5666666666666667, 'r': 0.015567765567765568, 'f1': 0.030303030303030307}, 'combined': 0.01948051948051948, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Russian: {'template': {'p': 0.9402985074626866, 'r': 0.4921875, 'f1': 0.6461538461538462}, 'slot': {'p': 0.5666666666666667, 'r': 0.015567765567765568, 'f1': 0.030303030303030307}, 'combined': 0.019580419580419586, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 16 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:43:00.732561: step: 4/77, loss: 1.1221683053008746e-05 2023-01-22 08:43:02.021831: step: 8/77, loss: 3.455118348938413e-05 2023-01-22 08:43:03.345526: step: 12/77, loss: 0.0018443784210830927 2023-01-22 08:43:04.666441: step: 16/77, loss: 0.007893680594861507 2023-01-22 08:43:05.928062: step: 20/77, loss: 0.00171194679569453 2023-01-22 08:43:07.271606: step: 24/77, loss: 0.004791442304849625 2023-01-22 08:43:08.549333: step: 28/77, loss: 0.005079994443804026 2023-01-22 08:43:09.875319: step: 32/77, loss: 0.0013969866558909416 2023-01-22 08:43:11.188907: step: 36/77, loss: 0.0005662287003360689 2023-01-22 08:43:12.509321: step: 40/77, loss: 0.006739386357367039 2023-01-22 08:43:13.781013: step: 44/77, loss: 0.057136066257953644 2023-01-22 08:43:15.067605: step: 48/77, loss: 0.08786522597074509 2023-01-22 08:43:16.386739: step: 52/77, loss: 0.0006959072197787464 2023-01-22 08:43:17.707145: step: 56/77, loss: 8.594767132308334e-05 2023-01-22 08:43:18.999181: step: 60/77, loss: 0.0023656277917325497 2023-01-22 08:43:20.304501: step: 64/77, loss: 9.632251021685079e-06 2023-01-22 08:43:21.653918: step: 68/77, loss: 0.010727917775511742 2023-01-22 08:43:22.979852: step: 72/77, loss: 5.180209336685948e-05 2023-01-22 08:43:24.306495: step: 76/77, loss: 1.1807405826402828e-05 2023-01-22 08:43:25.645313: step: 80/77, loss: 0.007526985835283995 2023-01-22 08:43:27.028760: step: 84/77, loss: 0.03580911085009575 2023-01-22 08:43:28.373700: step: 88/77, loss: 8.36990075185895e-05 2023-01-22 08:43:29.734222: step: 92/77, loss: 0.00021380602265708148 2023-01-22 08:43:31.053695: step: 96/77, loss: 5.718777629226679e-06 2023-01-22 08:43:32.375675: step: 100/77, loss: 0.0013107493286952376 2023-01-22 08:43:33.751121: step: 104/77, loss: 0.0012287443969398737 2023-01-22 08:43:35.076595: step: 108/77, loss: 0.0061523388139903545 2023-01-22 08:43:36.368911: step: 112/77, loss: 0.0012815805384889245 2023-01-22 08:43:37.758290: step: 116/77, loss: 0.020566733554005623 2023-01-22 08:43:39.050921: step: 120/77, loss: 0.002454740460962057 2023-01-22 08:43:40.391373: step: 124/77, loss: 0.0029755199793726206 2023-01-22 08:43:41.718360: step: 128/77, loss: 0.012791633605957031 2023-01-22 08:43:43.079517: step: 132/77, loss: 6.327753362711519e-05 2023-01-22 08:43:44.353314: step: 136/77, loss: 0.008632343262434006 2023-01-22 08:43:45.641236: step: 140/77, loss: 8.667247311677784e-05 2023-01-22 08:43:46.955430: step: 144/77, loss: 7.0642381615471095e-06 2023-01-22 08:43:48.295185: step: 148/77, loss: 0.009543057531118393 2023-01-22 08:43:49.662443: step: 152/77, loss: 6.317175575532019e-06 2023-01-22 08:43:51.004924: step: 156/77, loss: 0.01206673588603735 2023-01-22 08:43:52.312150: step: 160/77, loss: 0.0038095468189567327 2023-01-22 08:43:53.592735: step: 164/77, loss: 0.0035688632633537054 2023-01-22 08:43:54.882395: step: 168/77, loss: 0.0001779919257387519 2023-01-22 08:43:56.235980: step: 172/77, loss: 0.07000274211168289 2023-01-22 08:43:57.566700: step: 176/77, loss: 0.0008181778248399496 2023-01-22 08:43:58.818190: step: 180/77, loss: 5.645014880428789e-06 2023-01-22 08:44:00.133670: step: 184/77, loss: 0.0024146586656570435 2023-01-22 08:44:01.478351: step: 188/77, loss: 0.0016151170711964369 2023-01-22 08:44:02.782572: step: 192/77, loss: 0.016128059476614 2023-01-22 08:44:04.102329: step: 196/77, loss: 0.0013105386169627309 2023-01-22 08:44:05.411866: step: 200/77, loss: 0.0015561481704935431 2023-01-22 08:44:06.767104: step: 204/77, loss: 0.0778515636920929 2023-01-22 08:44:08.076854: step: 208/77, loss: 1.0541395567997824e-05 2023-01-22 08:44:09.320712: step: 212/77, loss: 0.0022206148132681847 2023-01-22 08:44:10.625892: step: 216/77, loss: 0.0011248596711084247 2023-01-22 08:44:11.958725: step: 220/77, loss: 7.704282324993983e-05 2023-01-22 08:44:13.300723: step: 224/77, loss: 0.0004228210891596973 2023-01-22 08:44:14.608118: step: 228/77, loss: 0.029599463567137718 2023-01-22 08:44:15.944322: step: 232/77, loss: 0.031230268999934196 2023-01-22 08:44:17.249845: step: 236/77, loss: 0.01613214612007141 2023-01-22 08:44:18.542439: step: 240/77, loss: 0.0005347841652110219 2023-01-22 08:44:19.877990: step: 244/77, loss: 8.026025170693174e-06 2023-01-22 08:44:21.169964: step: 248/77, loss: 0.06191571429371834 2023-01-22 08:44:22.518229: step: 252/77, loss: 1.5007139154477045e-05 2023-01-22 08:44:23.848911: step: 256/77, loss: 0.004064864944666624 2023-01-22 08:44:25.137008: step: 260/77, loss: 0.004756799899041653 2023-01-22 08:44:26.436765: step: 264/77, loss: 0.0007279182900674641 2023-01-22 08:44:27.764474: step: 268/77, loss: 0.0008052777266129851 2023-01-22 08:44:29.094106: step: 272/77, loss: 2.942733544841758e-06 2023-01-22 08:44:30.368535: step: 276/77, loss: 0.009561686776578426 2023-01-22 08:44:31.692510: step: 280/77, loss: 3.643817763077095e-05 2023-01-22 08:44:33.012162: step: 284/77, loss: 2.342433617741335e-06 2023-01-22 08:44:34.332726: step: 288/77, loss: 1.1547286703716964e-05 2023-01-22 08:44:35.636553: step: 292/77, loss: 8.301382877107244e-06 2023-01-22 08:44:36.957474: step: 296/77, loss: 0.011198680847883224 2023-01-22 08:44:38.278156: step: 300/77, loss: 0.00016585066623520106 2023-01-22 08:44:39.586684: step: 304/77, loss: 7.458726759068668e-05 2023-01-22 08:44:40.891666: step: 308/77, loss: 0.00017216156993526965 2023-01-22 08:44:42.186904: step: 312/77, loss: 0.00261271302588284 2023-01-22 08:44:43.480567: step: 316/77, loss: 0.03502047806978226 2023-01-22 08:44:44.764040: step: 320/77, loss: 0.049201589077711105 2023-01-22 08:44:46.104301: step: 324/77, loss: 1.7195624195665005e-06 2023-01-22 08:44:47.405524: step: 328/77, loss: 0.0020926424767822027 2023-01-22 08:44:48.754595: step: 332/77, loss: 0.0003969683893956244 2023-01-22 08:44:50.032554: step: 336/77, loss: 0.007147402036935091 2023-01-22 08:44:51.379711: step: 340/77, loss: 2.195253546233289e-05 2023-01-22 08:44:52.704932: step: 344/77, loss: 0.00015550327952951193 2023-01-22 08:44:54.034290: step: 348/77, loss: 0.0010230513289570808 2023-01-22 08:44:55.320477: step: 352/77, loss: 0.055711206048727036 2023-01-22 08:44:56.643692: step: 356/77, loss: 0.16461989283561707 2023-01-22 08:44:57.933197: step: 360/77, loss: 0.0035247791092842817 2023-01-22 08:44:59.293034: step: 364/77, loss: 0.0020639460999518633 2023-01-22 08:45:00.622400: step: 368/77, loss: 0.0005306448438204825 2023-01-22 08:45:01.998342: step: 372/77, loss: 0.0008472758927382529 2023-01-22 08:45:03.339511: step: 376/77, loss: 0.00011176289262948558 2023-01-22 08:45:04.634979: step: 380/77, loss: 0.01918557472527027 2023-01-22 08:45:05.934718: step: 384/77, loss: 0.0978318601846695 2023-01-22 08:45:07.222316: step: 388/77, loss: 0.0711839497089386 ================================================== Loss: 0.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Chinese: {'template': {'p': 0.9315068493150684, 'r': 0.53125, 'f1': 0.6766169154228856}, 'slot': {'p': 0.6, 'r': 0.013736263736263736, 'f1': 0.026857654431512987}, 'combined': 0.018172343296944112, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Korean: {'template': {'p': 0.9178082191780822, 'r': 0.5234375, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5555555555555556, 'r': 0.013736263736263736, 'f1': 0.026809651474530835}, 'combined': 0.017873100983020557, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Russian: {'template': {'p': 0.9444444444444444, 'r': 0.53125, 'f1': 0.6799999999999999}, 'slot': {'p': 0.6, 'r': 0.013736263736263736, 'f1': 0.026857654431512987}, 'combined': 0.01826320501342883, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 17 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:46:54.088511: step: 4/77, loss: 0.0006470663356594741 2023-01-22 08:46:55.425209: step: 8/77, loss: 0.001525210915133357 2023-01-22 08:46:56.702296: step: 12/77, loss: 3.1704777939012274e-05 2023-01-22 08:46:57.989876: step: 16/77, loss: 0.038258712738752365 2023-01-22 08:46:59.284519: step: 20/77, loss: 0.020408688113093376 2023-01-22 08:47:00.614052: step: 24/77, loss: 9.983767768062535e-08 2023-01-22 08:47:01.925071: step: 28/77, loss: 0.006725261453539133 2023-01-22 08:47:03.228877: step: 32/77, loss: 0.005994974169880152 2023-01-22 08:47:04.563583: step: 36/77, loss: 0.06461626291275024 2023-01-22 08:47:05.916238: step: 40/77, loss: 0.0004826833610422909 2023-01-22 08:47:07.234067: step: 44/77, loss: 0.0012849881313741207 2023-01-22 08:47:08.574623: step: 48/77, loss: 8.283742499770597e-05 2023-01-22 08:47:09.884062: step: 52/77, loss: 1.056480641636881e-06 2023-01-22 08:47:11.177947: step: 56/77, loss: 0.0065393103286623955 2023-01-22 08:47:12.556072: step: 60/77, loss: 0.0013470399426296353 2023-01-22 08:47:13.833487: step: 64/77, loss: 0.022369032725691795 2023-01-22 08:47:15.166683: step: 68/77, loss: 0.015005374327301979 2023-01-22 08:47:16.471711: step: 72/77, loss: 0.05512889474630356 2023-01-22 08:47:17.739931: step: 76/77, loss: 0.004738078452646732 2023-01-22 08:47:19.102016: step: 80/77, loss: 0.01645592227578163 2023-01-22 08:47:20.335150: step: 84/77, loss: 3.1562663934892043e-05 2023-01-22 08:47:21.645992: step: 88/77, loss: 0.005582800600677729 2023-01-22 08:47:22.962976: step: 92/77, loss: 7.547959830844775e-05 2023-01-22 08:47:24.278796: step: 96/77, loss: 0.006869758944958448 2023-01-22 08:47:25.520095: step: 100/77, loss: 0.0005420552333816886 2023-01-22 08:47:26.791356: step: 104/77, loss: 3.939624548365828e-06 2023-01-22 08:47:28.057408: step: 108/77, loss: 0.005915517918765545 2023-01-22 08:47:29.351995: step: 112/77, loss: 0.005799043457955122 2023-01-22 08:47:30.704917: step: 116/77, loss: 0.0006891103694215417 2023-01-22 08:47:32.043356: step: 120/77, loss: 0.01312168687582016 2023-01-22 08:47:33.396318: step: 124/77, loss: 0.0011892368784174323 2023-01-22 08:47:34.733811: step: 128/77, loss: 0.0014223118778318167 2023-01-22 08:47:36.075872: step: 132/77, loss: 0.0015876988181844354 2023-01-22 08:47:37.392719: step: 136/77, loss: 7.348333019763231e-05 2023-01-22 08:47:38.774944: step: 140/77, loss: 0.015635933727025986 2023-01-22 08:47:40.063895: step: 144/77, loss: 0.008616279810667038 2023-01-22 08:47:41.410341: step: 148/77, loss: 0.0005551224458031356 2023-01-22 08:47:42.792007: step: 152/77, loss: 0.00016584055265411735 2023-01-22 08:47:44.088457: step: 156/77, loss: 0.047848138958215714 2023-01-22 08:47:45.375941: step: 160/77, loss: 0.0003821274731308222 2023-01-22 08:47:46.660195: step: 164/77, loss: 0.003461926942691207 2023-01-22 08:47:48.020253: step: 168/77, loss: 0.020093290135264397 2023-01-22 08:47:49.332972: step: 172/77, loss: 0.011807311326265335 2023-01-22 08:47:50.641808: step: 176/77, loss: 0.0002801486407406628 2023-01-22 08:47:51.956403: step: 180/77, loss: 2.6831652576220222e-05 2023-01-22 08:47:53.327094: step: 184/77, loss: 0.08000031113624573 2023-01-22 08:47:54.632214: step: 188/77, loss: 3.856466355500743e-05 2023-01-22 08:47:55.984169: step: 192/77, loss: 0.0019287059549242258 2023-01-22 08:47:57.319298: step: 196/77, loss: 0.014379382133483887 2023-01-22 08:47:58.652589: step: 200/77, loss: 0.0005766113172285259 2023-01-22 08:47:59.985465: step: 204/77, loss: 5.584115206147544e-05 2023-01-22 08:48:01.317570: step: 208/77, loss: 0.0003904126351699233 2023-01-22 08:48:02.630081: step: 212/77, loss: 4.62036359749618e-06 2023-01-22 08:48:03.922378: step: 216/77, loss: 0.0032101867254823446 2023-01-22 08:48:05.245763: step: 220/77, loss: 0.0033433244097977877 2023-01-22 08:48:06.640457: step: 224/77, loss: 0.020648961886763573 2023-01-22 08:48:07.955842: step: 228/77, loss: 0.0001718879648251459 2023-01-22 08:48:09.221539: step: 232/77, loss: 0.004351540934294462 2023-01-22 08:48:10.578918: step: 236/77, loss: 0.0005007802392356098 2023-01-22 08:48:11.904149: step: 240/77, loss: 0.00028103828663006425 2023-01-22 08:48:13.208882: step: 244/77, loss: 0.023118944838643074 2023-01-22 08:48:14.523211: step: 248/77, loss: 0.007660750299692154 2023-01-22 08:48:15.857494: step: 252/77, loss: 2.7252701784163946e-06 2023-01-22 08:48:17.215080: step: 256/77, loss: 0.0017567173345014453 2023-01-22 08:48:18.505414: step: 260/77, loss: 0.0019385181367397308 2023-01-22 08:48:19.763368: step: 264/77, loss: 4.470348091700771e-09 2023-01-22 08:48:21.069096: step: 268/77, loss: 0.0003453208482824266 2023-01-22 08:48:22.413075: step: 272/77, loss: 0.021467359736561775 2023-01-22 08:48:23.710747: step: 276/77, loss: 0.01447715051472187 2023-01-22 08:48:25.026301: step: 280/77, loss: 0.008323321118950844 2023-01-22 08:48:26.299452: step: 284/77, loss: 0.043272241950035095 2023-01-22 08:48:27.642486: step: 288/77, loss: 0.014325212687253952 2023-01-22 08:48:28.988773: step: 292/77, loss: 0.0008731039706617594 2023-01-22 08:48:30.341393: step: 296/77, loss: 0.11861124634742737 2023-01-22 08:48:31.610237: step: 300/77, loss: 0.03012103959918022 2023-01-22 08:48:32.917039: step: 304/77, loss: 0.04930185526609421 2023-01-22 08:48:34.208560: step: 308/77, loss: 5.2927560318494216e-05 2023-01-22 08:48:35.504349: step: 312/77, loss: 0.03037060610949993 2023-01-22 08:48:36.849416: step: 316/77, loss: 0.000295668316539377 2023-01-22 08:48:38.121489: step: 320/77, loss: 0.001755043282173574 2023-01-22 08:48:39.475183: step: 324/77, loss: 9.944363409886137e-05 2023-01-22 08:48:40.825199: step: 328/77, loss: 0.02383231185376644 2023-01-22 08:48:42.167192: step: 332/77, loss: 0.0026795738376677036 2023-01-22 08:48:43.441806: step: 336/77, loss: 0.022737769410014153 2023-01-22 08:48:44.745902: step: 340/77, loss: 0.0007600174867548048 2023-01-22 08:48:46.044519: step: 344/77, loss: 2.845424387487583e-05 2023-01-22 08:48:47.425956: step: 348/77, loss: 0.0003975990694016218 2023-01-22 08:48:48.734228: step: 352/77, loss: 0.0001918773486977443 2023-01-22 08:48:50.011444: step: 356/77, loss: 0.018601376563310623 2023-01-22 08:48:51.312552: step: 360/77, loss: 0.01445725467056036 2023-01-22 08:48:52.636014: step: 364/77, loss: 0.0005568335764110088 2023-01-22 08:48:53.977997: step: 368/77, loss: 2.918254176620394e-05 2023-01-22 08:48:55.282969: step: 372/77, loss: 0.0012686774134635925 2023-01-22 08:48:56.562698: step: 376/77, loss: 0.035819850862026215 2023-01-22 08:48:57.858463: step: 380/77, loss: 0.00040851483936421573 2023-01-22 08:48:59.177370: step: 384/77, loss: 0.0002468058664817363 2023-01-22 08:49:00.473880: step: 388/77, loss: 0.00334012508392334 ================================================== Loss: 0.011 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Chinese: {'template': {'p': 0.9411764705882353, 'r': 0.5, 'f1': 0.6530612244897959}, 'slot': {'p': 0.4838709677419355, 'r': 0.013736263736263736, 'f1': 0.026714158504007122}, 'combined': 0.017445981063841386, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Korean: {'template': {'p': 0.9411764705882353, 'r': 0.5, 'f1': 0.6530612244897959}, 'slot': {'p': 0.4838709677419355, 'r': 0.013736263736263736, 'f1': 0.026714158504007122}, 'combined': 0.017445981063841386, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Russian: {'template': {'p': 0.9411764705882353, 'r': 0.5, 'f1': 0.6530612244897959}, 'slot': {'p': 0.46875, 'r': 0.013736263736263736, 'f1': 0.02669039145907473}, 'combined': 0.01743045972837533, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 18 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:50:46.998902: step: 4/77, loss: 0.006145583000034094 2023-01-22 08:50:48.287538: step: 8/77, loss: 0.06270524114370346 2023-01-22 08:50:49.623657: step: 12/77, loss: 0.012457462027668953 2023-01-22 08:50:50.931253: step: 16/77, loss: 4.878042818745598e-05 2023-01-22 08:50:52.197400: step: 20/77, loss: 0.005514089018106461 2023-01-22 08:50:53.499852: step: 24/77, loss: 0.0011011639144271612 2023-01-22 08:50:54.824043: step: 28/77, loss: 0.002201240276917815 2023-01-22 08:50:56.149802: step: 32/77, loss: 0.09032203257083893 2023-01-22 08:50:57.494041: step: 36/77, loss: 0.0004244197625666857 2023-01-22 08:50:58.826734: step: 40/77, loss: 0.0036006891168653965 2023-01-22 08:51:00.102661: step: 44/77, loss: 0.0006497090798802674 2023-01-22 08:51:01.423471: step: 48/77, loss: 0.0001267041079699993 2023-01-22 08:51:02.704702: step: 52/77, loss: 0.0015523693291470408 2023-01-22 08:51:03.970840: step: 56/77, loss: 6.539768037328031e-06 2023-01-22 08:51:05.258943: step: 60/77, loss: 0.01422657910734415 2023-01-22 08:51:06.588693: step: 64/77, loss: 0.0015929980436339974 2023-01-22 08:51:07.877312: step: 68/77, loss: 0.0010093044256791472 2023-01-22 08:51:09.186398: step: 72/77, loss: 0.1789524108171463 2023-01-22 08:51:10.505974: step: 76/77, loss: 0.007612825371325016 2023-01-22 08:51:11.817084: step: 80/77, loss: 0.006012186408042908 2023-01-22 08:51:13.153734: step: 84/77, loss: 0.0017496285727247596 2023-01-22 08:51:14.476025: step: 88/77, loss: 0.00408580806106329 2023-01-22 08:51:15.791833: step: 92/77, loss: 0.040068477392196655 2023-01-22 08:51:17.077000: step: 96/77, loss: 0.0015697141643613577 2023-01-22 08:51:18.374199: step: 100/77, loss: 0.017410293221473694 2023-01-22 08:51:19.670067: step: 104/77, loss: 2.817763061102596e-06 2023-01-22 08:51:20.973756: step: 108/77, loss: 0.019066637381911278 2023-01-22 08:51:22.256712: step: 112/77, loss: 0.011317798867821693 2023-01-22 08:51:23.562542: step: 116/77, loss: 0.026462309062480927 2023-01-22 08:51:24.885346: step: 120/77, loss: 5.888029409106821e-05 2023-01-22 08:51:26.201371: step: 124/77, loss: 0.0030098133720457554 2023-01-22 08:51:27.526841: step: 128/77, loss: 0.0036532857920974493 2023-01-22 08:51:28.816191: step: 132/77, loss: 0.04920339956879616 2023-01-22 08:51:30.215286: step: 136/77, loss: 0.01856566034257412 2023-01-22 08:51:31.563411: step: 140/77, loss: 6.372792995534837e-05 2023-01-22 08:51:32.906464: step: 144/77, loss: 8.016057108761743e-06 2023-01-22 08:51:34.238342: step: 148/77, loss: 0.0004995768540538847 2023-01-22 08:51:35.545930: step: 152/77, loss: 0.06058551371097565 2023-01-22 08:51:36.843715: step: 156/77, loss: 3.7383480957942083e-05 2023-01-22 08:51:38.083142: step: 160/77, loss: 0.003941821400076151 2023-01-22 08:51:39.416285: step: 164/77, loss: 0.0005088653997518122 2023-01-22 08:51:40.734989: step: 168/77, loss: 1.7664906408754177e-05 2023-01-22 08:51:42.101971: step: 172/77, loss: 0.0021961110178381205 2023-01-22 08:51:43.433992: step: 176/77, loss: 0.008128118701279163 2023-01-22 08:51:44.771421: step: 180/77, loss: 0.033387646079063416 2023-01-22 08:51:46.063343: step: 184/77, loss: 0.014834368601441383 2023-01-22 08:51:47.439371: step: 188/77, loss: 0.0006523691117763519 2023-01-22 08:51:48.732716: step: 192/77, loss: 0.01223810575902462 2023-01-22 08:51:50.094345: step: 196/77, loss: 0.0009100798051804304 2023-01-22 08:51:51.422202: step: 200/77, loss: 0.0030384385026991367 2023-01-22 08:51:52.779767: step: 204/77, loss: 0.004068047273904085 2023-01-22 08:51:54.094892: step: 208/77, loss: 0.0010591279715299606 2023-01-22 08:51:55.431625: step: 212/77, loss: 0.00010257431131321937 2023-01-22 08:51:56.744302: step: 216/77, loss: 0.00011126314348075539 2023-01-22 08:51:58.044864: step: 220/77, loss: 4.5440105168381706e-05 2023-01-22 08:51:59.346976: step: 224/77, loss: 0.017476027831435204 2023-01-22 08:52:00.713837: step: 228/77, loss: 0.028088154271245003 2023-01-22 08:52:02.021871: step: 232/77, loss: 7.048526458675042e-05 2023-01-22 08:52:03.392043: step: 236/77, loss: 0.004286248702555895 2023-01-22 08:52:04.724129: step: 240/77, loss: 8.81606865732465e-06 2023-01-22 08:52:06.071667: step: 244/77, loss: 0.0001825095241656527 2023-01-22 08:52:07.390358: step: 248/77, loss: 0.001119068474508822 2023-01-22 08:52:08.706262: step: 252/77, loss: 3.7998961488483474e-05 2023-01-22 08:52:10.086073: step: 256/77, loss: 0.0009253112366423011 2023-01-22 08:52:11.335748: step: 260/77, loss: 0.0012064689071848989 2023-01-22 08:52:12.679026: step: 264/77, loss: 5.0296126573812217e-05 2023-01-22 08:52:13.988976: step: 268/77, loss: 7.039660704322159e-05 2023-01-22 08:52:15.290304: step: 272/77, loss: 0.009951543062925339 2023-01-22 08:52:16.604321: step: 276/77, loss: 0.00021127743821125478 2023-01-22 08:52:17.931887: step: 280/77, loss: 5.046322985435836e-05 2023-01-22 08:52:19.282152: step: 284/77, loss: 0.03399345651268959 2023-01-22 08:52:20.585063: step: 288/77, loss: 0.0733359232544899 2023-01-22 08:52:21.892616: step: 292/77, loss: 0.002043990883976221 2023-01-22 08:52:23.235464: step: 296/77, loss: 0.030689438804984093 2023-01-22 08:52:24.587768: step: 300/77, loss: 0.005732610356062651 2023-01-22 08:52:25.925933: step: 304/77, loss: 1.6658834283589385e-05 2023-01-22 08:52:27.255940: step: 308/77, loss: 0.050420764833688736 2023-01-22 08:52:28.517299: step: 312/77, loss: 0.001651364378631115 2023-01-22 08:52:29.840096: step: 316/77, loss: 0.00013521264190785587 2023-01-22 08:52:31.158629: step: 320/77, loss: 5.329089981387369e-05 2023-01-22 08:52:32.467664: step: 324/77, loss: 0.00023122054699342698 2023-01-22 08:52:33.786663: step: 328/77, loss: 0.0006129042012616992 2023-01-22 08:52:35.102450: step: 332/77, loss: 0.007153256330639124 2023-01-22 08:52:36.372351: step: 336/77, loss: 0.00283637223765254 2023-01-22 08:52:37.698566: step: 340/77, loss: 0.009734341874718666 2023-01-22 08:52:39.003414: step: 344/77, loss: 0.0005088862963020802 2023-01-22 08:52:40.329776: step: 348/77, loss: 1.630113615647133e-06 2023-01-22 08:52:41.683231: step: 352/77, loss: 0.0002557302941568196 2023-01-22 08:52:42.998430: step: 356/77, loss: 4.768367034557741e-08 2023-01-22 08:52:44.300679: step: 360/77, loss: 4.550819721771404e-05 2023-01-22 08:52:45.609757: step: 364/77, loss: 3.684824150695931e-06 2023-01-22 08:52:46.927867: step: 368/77, loss: 0.001974704908207059 2023-01-22 08:52:48.232687: step: 372/77, loss: 0.00014736379671376199 2023-01-22 08:52:49.520383: step: 376/77, loss: 0.001268755062483251 2023-01-22 08:52:50.800622: step: 380/77, loss: 0.0002300547348568216 2023-01-22 08:52:52.137180: step: 384/77, loss: 0.005775130353868008 2023-01-22 08:52:53.414197: step: 388/77, loss: 7.426962110912427e-05 ================================================== Loss: 0.011 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Chinese: {'template': {'p': 0.9661016949152542, 'r': 0.4453125, 'f1': 0.6096256684491977}, 'slot': {'p': 0.5161290322580645, 'r': 0.014652014652014652, 'f1': 0.028495102404274268}, 'combined': 0.017371345850734042, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Korean: {'template': {'p': 0.9661016949152542, 'r': 0.4453125, 'f1': 0.6096256684491977}, 'slot': {'p': 0.5, 'r': 0.014652014652014652, 'f1': 0.028469750889679717}, 'combined': 0.01735589091670314, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Russian: {'template': {'p': 0.9661016949152542, 'r': 0.4453125, 'f1': 0.6096256684491977}, 'slot': {'p': 0.5, 'r': 0.013736263736263736, 'f1': 0.026737967914438505}, 'combined': 0.016300151562812774, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 19 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:54:40.181965: step: 4/77, loss: 0.0010036565363407135 2023-01-22 08:54:41.520715: step: 8/77, loss: 0.002950843423604965 2023-01-22 08:54:42.816930: step: 12/77, loss: 0.0068945009261369705 2023-01-22 08:54:44.101360: step: 16/77, loss: 0.03682752698659897 2023-01-22 08:54:45.449843: step: 20/77, loss: 8.210972737288103e-05 2023-01-22 08:54:46.816282: step: 24/77, loss: 0.00015185496886260808 2023-01-22 08:54:48.126290: step: 28/77, loss: 0.0012898561544716358 2023-01-22 08:54:49.441538: step: 32/77, loss: 0.049722883850336075 2023-01-22 08:54:50.747672: step: 36/77, loss: 0.0025275088846683502 2023-01-22 08:54:52.077912: step: 40/77, loss: 0.0003120983310509473 2023-01-22 08:54:53.370265: step: 44/77, loss: 0.00011641572928056121 2023-01-22 08:54:54.692284: step: 48/77, loss: 0.0001375876454403624 2023-01-22 08:54:56.035232: step: 52/77, loss: 0.0001678016851656139 2023-01-22 08:54:57.407557: step: 56/77, loss: 0.025235647335648537 2023-01-22 08:54:58.661519: step: 60/77, loss: 0.01438209693878889 2023-01-22 08:54:59.995131: step: 64/77, loss: 0.0008003878756426275 2023-01-22 08:55:01.307858: step: 68/77, loss: 8.286495722131804e-05 2023-01-22 08:55:02.614746: step: 72/77, loss: 4.659318165067816e-06 2023-01-22 08:55:03.901511: step: 76/77, loss: 1.2725478882202879e-05 2023-01-22 08:55:05.197324: step: 80/77, loss: 5.763382068835199e-05 2023-01-22 08:55:06.499507: step: 84/77, loss: 0.00048236100701615214 2023-01-22 08:55:07.869880: step: 88/77, loss: 6.148970715003088e-05 2023-01-22 08:55:09.186811: step: 92/77, loss: 0.00012766192958224565 2023-01-22 08:55:10.512686: step: 96/77, loss: 0.041304394602775574 2023-01-22 08:55:11.743490: step: 100/77, loss: 0.0013864014763385057 2023-01-22 08:55:13.044871: step: 104/77, loss: 0.019462674856185913 2023-01-22 08:55:14.429850: step: 108/77, loss: 6.260276131797582e-05 2023-01-22 08:55:15.711721: step: 112/77, loss: 0.006182023324072361 2023-01-22 08:55:16.984571: step: 116/77, loss: 0.003532269038259983 2023-01-22 08:55:18.286086: step: 120/77, loss: 0.004249984864145517 2023-01-22 08:55:19.641091: step: 124/77, loss: 0.0005928549799136817 2023-01-22 08:55:20.951646: step: 128/77, loss: 0.0006586603703908622 2023-01-22 08:55:22.263976: step: 132/77, loss: 0.0019883771892637014 2023-01-22 08:55:23.563689: step: 136/77, loss: 0.0007436954183503985 2023-01-22 08:55:24.882865: step: 140/77, loss: 0.06668004393577576 2023-01-22 08:55:26.199519: step: 144/77, loss: 2.946595850517042e-05 2023-01-22 08:55:27.528945: step: 148/77, loss: 0.044872432947158813 2023-01-22 08:55:28.828081: step: 152/77, loss: 7.701337744947523e-05 2023-01-22 08:55:30.195937: step: 156/77, loss: 0.00036041653947904706 2023-01-22 08:55:31.523785: step: 160/77, loss: 0.002154907677322626 2023-01-22 08:55:32.847913: step: 164/77, loss: 5.0706959882518277e-05 2023-01-22 08:55:34.155989: step: 168/77, loss: 0.021075882017612457 2023-01-22 08:55:35.517624: step: 172/77, loss: 7.449004624504596e-05 2023-01-22 08:55:36.822936: step: 176/77, loss: 0.0007037436589598656 2023-01-22 08:55:38.167453: step: 180/77, loss: 0.005983549170196056 2023-01-22 08:55:39.559651: step: 184/77, loss: 0.00024994040722958744 2023-01-22 08:55:40.878273: step: 188/77, loss: 0.0027954450342804193 2023-01-22 08:55:42.176398: step: 192/77, loss: 1.932227132783737e-05 2023-01-22 08:55:43.504568: step: 196/77, loss: 8.82583117345348e-05 2023-01-22 08:55:44.839669: step: 200/77, loss: 0.012760159559547901 2023-01-22 08:55:46.155796: step: 204/77, loss: 0.021436506882309914 2023-01-22 08:55:47.455245: step: 208/77, loss: 0.01409657672047615 2023-01-22 08:55:48.741160: step: 212/77, loss: 0.0010179778328165412 2023-01-22 08:55:50.054674: step: 216/77, loss: 0.04306625947356224 2023-01-22 08:55:51.377019: step: 220/77, loss: 0.04988136142492294 2023-01-22 08:55:52.670663: step: 224/77, loss: 0.0002051626070169732 2023-01-22 08:55:54.004426: step: 228/77, loss: 4.13992784160655e-05 2023-01-22 08:55:55.341749: step: 232/77, loss: 2.0824325474677607e-05 2023-01-22 08:55:56.680358: step: 236/77, loss: 0.03562576696276665 2023-01-22 08:55:58.005101: step: 240/77, loss: 2.7725507607101463e-05 2023-01-22 08:55:59.272868: step: 244/77, loss: 0.01949121057987213 2023-01-22 08:56:00.574352: step: 248/77, loss: 8.667161637276877e-06 2023-01-22 08:56:01.913698: step: 252/77, loss: 0.012959137558937073 2023-01-22 08:56:03.169986: step: 256/77, loss: 0.00032826358801685274 2023-01-22 08:56:04.496578: step: 260/77, loss: 0.11627621948719025 2023-01-22 08:56:05.806953: step: 264/77, loss: 0.09269588440656662 2023-01-22 08:56:07.102162: step: 268/77, loss: 1.3629623026645277e-05 2023-01-22 08:56:08.442792: step: 272/77, loss: 0.003814670955762267 2023-01-22 08:56:09.745386: step: 276/77, loss: 0.001455141231417656 2023-01-22 08:56:11.070155: step: 280/77, loss: 0.0021209963597357273 2023-01-22 08:56:12.413152: step: 284/77, loss: 0.00024511825176887214 2023-01-22 08:56:13.741829: step: 288/77, loss: 0.0009873814415186644 2023-01-22 08:56:15.051217: step: 292/77, loss: 2.3900884116301313e-06 2023-01-22 08:56:16.371658: step: 296/77, loss: 0.009080913849174976 2023-01-22 08:56:17.674031: step: 300/77, loss: 0.00018391606863588095 2023-01-22 08:56:18.942342: step: 304/77, loss: 0.00011140089191030711 2023-01-22 08:56:20.229572: step: 308/77, loss: 0.001048402744345367 2023-01-22 08:56:21.493787: step: 312/77, loss: 0.015014705248177052 2023-01-22 08:56:22.815300: step: 316/77, loss: 0.0017719214083626866 2023-01-22 08:56:24.127076: step: 320/77, loss: 0.00010908178228419274 2023-01-22 08:56:25.433394: step: 324/77, loss: 0.006568096112459898 2023-01-22 08:56:26.758453: step: 328/77, loss: 5.54263788217213e-05 2023-01-22 08:56:28.078166: step: 332/77, loss: 0.00237331073731184 2023-01-22 08:56:29.443208: step: 336/77, loss: 0.02834990620613098 2023-01-22 08:56:30.720910: step: 340/77, loss: 0.0236161220818758 2023-01-22 08:56:32.072607: step: 344/77, loss: 0.000626914668828249 2023-01-22 08:56:33.396557: step: 348/77, loss: 0.005827236454933882 2023-01-22 08:56:34.704544: step: 352/77, loss: 0.0011040312238037586 2023-01-22 08:56:36.072765: step: 356/77, loss: 9.292290633311495e-05 2023-01-22 08:56:37.403816: step: 360/77, loss: 2.6329313186579384e-05 2023-01-22 08:56:38.748695: step: 364/77, loss: 0.00045233272248879075 2023-01-22 08:56:40.042158: step: 368/77, loss: 0.00047104203258641064 2023-01-22 08:56:41.358267: step: 372/77, loss: 0.022833313792943954 2023-01-22 08:56:42.639044: step: 376/77, loss: 0.0005413969047367573 2023-01-22 08:56:43.889762: step: 380/77, loss: 0.00017948381719179451 2023-01-22 08:56:45.132587: step: 384/77, loss: 0.01384049467742443 2023-01-22 08:56:46.444444: step: 388/77, loss: 0.0009254040778614581 ================================================== Loss: 0.010 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Chinese: {'template': {'p': 0.9344262295081968, 'r': 0.4453125, 'f1': 0.6031746031746033}, 'slot': {'p': 0.45714285714285713, 'r': 0.014652014652014652, 'f1': 0.02839396628216504}, 'combined': 0.017126519344797964, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Korean: {'template': {'p': 0.9344262295081968, 'r': 0.4453125, 'f1': 0.6031746031746033}, 'slot': {'p': 0.47058823529411764, 'r': 0.014652014652014652, 'f1': 0.02841918294849023}, 'combined': 0.017141729397502043, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Russian: {'template': {'p': 0.9354838709677419, 'r': 0.453125, 'f1': 0.6105263157894737}, 'slot': {'p': 0.48484848484848486, 'r': 0.014652014652014652, 'f1': 0.028444444444444442}, 'combined': 0.01736608187134503, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 20 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:58:32.905811: step: 4/77, loss: 0.0007864898070693016 2023-01-22 08:58:34.221057: step: 8/77, loss: 0.010362272150814533 2023-01-22 08:58:35.577512: step: 12/77, loss: 0.006819052156060934 2023-01-22 08:58:36.833453: step: 16/77, loss: 2.1676016331184655e-05 2023-01-22 08:58:38.120976: step: 20/77, loss: 0.0011354070156812668 2023-01-22 08:58:39.395874: step: 24/77, loss: 2.8951749300176743e-06 2023-01-22 08:58:40.734652: step: 28/77, loss: 0.0001309202198171988 2023-01-22 08:58:42.070628: step: 32/77, loss: 0.003243007929995656 2023-01-22 08:58:43.359526: step: 36/77, loss: 0.002287351293489337 2023-01-22 08:58:44.670469: step: 40/77, loss: 7.462866778951138e-05 2023-01-22 08:58:45.916357: step: 44/77, loss: 0.0029088289011269808 2023-01-22 08:58:47.229048: step: 48/77, loss: 0.00041750201489776373 2023-01-22 08:58:48.574113: step: 52/77, loss: 0.0008609117357991636 2023-01-22 08:58:49.875707: step: 56/77, loss: 0.0004486891266424209 2023-01-22 08:58:51.217475: step: 60/77, loss: 0.0018713414901867509 2023-01-22 08:58:52.539806: step: 64/77, loss: 0.006790338084101677 2023-01-22 08:58:53.862510: step: 68/77, loss: 0.0003210757568012923 2023-01-22 08:58:55.180459: step: 72/77, loss: 0.08046431094408035 2023-01-22 08:58:56.500507: step: 76/77, loss: 3.1100178603082895e-05 2023-01-22 08:58:57.846289: step: 80/77, loss: 0.0002698895405046642 2023-01-22 08:58:59.142526: step: 84/77, loss: 0.0010851433034986258 2023-01-22 08:59:00.460956: step: 88/77, loss: 0.011157970875501633 2023-01-22 08:59:01.767936: step: 92/77, loss: 0.0004891256103292108 2023-01-22 08:59:03.093537: step: 96/77, loss: 0.03444315120577812 2023-01-22 08:59:04.419327: step: 100/77, loss: 0.0015314036281779408 2023-01-22 08:59:05.737191: step: 104/77, loss: 0.006588106043636799 2023-01-22 08:59:07.060064: step: 108/77, loss: 0.010639192536473274 2023-01-22 08:59:08.388492: step: 112/77, loss: 6.6985526245844085e-06 2023-01-22 08:59:09.688937: step: 116/77, loss: 7.102488598320633e-05 2023-01-22 08:59:11.008615: step: 120/77, loss: 0.001098927459679544 2023-01-22 08:59:12.319839: step: 124/77, loss: 2.5048670067917556e-05 2023-01-22 08:59:13.572589: step: 128/77, loss: 6.630965003751044e-07 2023-01-22 08:59:14.925332: step: 132/77, loss: 5.726329982280731e-05 2023-01-22 08:59:16.236554: step: 136/77, loss: 0.00011796267790487036 2023-01-22 08:59:17.572272: step: 140/77, loss: 0.0009620689670555294 2023-01-22 08:59:18.872626: step: 144/77, loss: 0.0058373697102069855 2023-01-22 08:59:20.186920: step: 148/77, loss: 0.0002568000345490873 2023-01-22 08:59:21.484354: step: 152/77, loss: 0.0041648694314062595 2023-01-22 08:59:22.824266: step: 156/77, loss: 0.01177394948899746 2023-01-22 08:59:24.134559: step: 160/77, loss: 0.000455582223366946 2023-01-22 08:59:25.477593: step: 164/77, loss: 0.0003266449202783406 2023-01-22 08:59:26.782808: step: 168/77, loss: 0.00044424354564398527 2023-01-22 08:59:28.094180: step: 172/77, loss: 1.743532993714325e-05 2023-01-22 08:59:29.417466: step: 176/77, loss: 0.0009272638126276433 2023-01-22 08:59:30.787555: step: 180/77, loss: 1.468725622544298e-05 2023-01-22 08:59:32.142530: step: 184/77, loss: 0.0016138491919264197 2023-01-22 08:59:33.448005: step: 188/77, loss: 1.6808103282528464e-06 2023-01-22 08:59:34.767442: step: 192/77, loss: 0.001134151709266007 2023-01-22 08:59:36.061678: step: 196/77, loss: 0.004834283143281937 2023-01-22 08:59:37.406933: step: 200/77, loss: 0.037488024681806564 2023-01-22 08:59:38.736180: step: 204/77, loss: 1.0326210713174078e-06 2023-01-22 08:59:40.033363: step: 208/77, loss: 0.006991858594119549 2023-01-22 08:59:41.365348: step: 212/77, loss: 7.4390841291460674e-06 2023-01-22 08:59:42.707363: step: 216/77, loss: 2.5584324703231687e-06 2023-01-22 08:59:44.010306: step: 220/77, loss: 0.0009339148527942598 2023-01-22 08:59:45.297002: step: 224/77, loss: 0.027637531980872154 2023-01-22 08:59:46.652209: step: 228/77, loss: 5.2310802857391536e-05 2023-01-22 08:59:47.994906: step: 232/77, loss: 4.627784073818475e-05 2023-01-22 08:59:49.278488: step: 236/77, loss: 0.0001499750214861706 2023-01-22 08:59:50.582581: step: 240/77, loss: 0.0003451184311416 2023-01-22 08:59:51.823233: step: 244/77, loss: 0.025616183876991272 2023-01-22 08:59:53.099916: step: 248/77, loss: 0.015998056158423424 2023-01-22 08:59:54.464293: step: 252/77, loss: 0.010580199770629406 2023-01-22 08:59:55.781640: step: 256/77, loss: 0.04575950279831886 2023-01-22 08:59:57.099901: step: 260/77, loss: 7.59999529691413e-05 2023-01-22 08:59:58.382299: step: 264/77, loss: 0.020015671849250793 2023-01-22 08:59:59.680499: step: 268/77, loss: 2.7577889341046102e-05 2023-01-22 09:00:01.010652: step: 272/77, loss: 9.327914085588418e-07 2023-01-22 09:00:02.306156: step: 276/77, loss: 0.00043893905240111053 2023-01-22 09:00:03.633219: step: 280/77, loss: 3.862224912154488e-05 2023-01-22 09:00:04.964032: step: 284/77, loss: 0.006454653572291136 2023-01-22 09:00:06.344099: step: 288/77, loss: 2.1047469999757595e-05 2023-01-22 09:00:07.648771: step: 292/77, loss: 0.0005261976039037108 2023-01-22 09:00:08.979198: step: 296/77, loss: 4.018589152110508e-06 2023-01-22 09:00:10.246465: step: 300/77, loss: 0.001395822619087994 2023-01-22 09:00:11.559622: step: 304/77, loss: 0.00048785883700475097 2023-01-22 09:00:12.887324: step: 308/77, loss: 0.0035083997063338757 2023-01-22 09:00:14.263625: step: 312/77, loss: 8.830250590108335e-05 2023-01-22 09:00:15.616691: step: 316/77, loss: 0.0009523604530841112 2023-01-22 09:00:16.954592: step: 320/77, loss: 1.5574347344227135e-05 2023-01-22 09:00:18.267687: step: 324/77, loss: 0.00030986740603111684 2023-01-22 09:00:19.595475: step: 328/77, loss: 0.00013391696847975254 2023-01-22 09:00:20.965627: step: 332/77, loss: 0.0010573953622952104 2023-01-22 09:00:22.241457: step: 336/77, loss: 0.0008024564012885094 2023-01-22 09:00:23.539453: step: 340/77, loss: 0.0025499192997813225 2023-01-22 09:00:24.894272: step: 344/77, loss: 0.006977582350373268 2023-01-22 09:00:26.245976: step: 348/77, loss: 5.410162884800229e-06 2023-01-22 09:00:27.571943: step: 352/77, loss: 0.025690611451864243 2023-01-22 09:00:28.918672: step: 356/77, loss: 0.0002882384869735688 2023-01-22 09:00:30.223867: step: 360/77, loss: 0.02115679532289505 2023-01-22 09:00:31.474642: step: 364/77, loss: 9.5250470621977e-05 2023-01-22 09:00:32.839205: step: 368/77, loss: 0.102179154753685 2023-01-22 09:00:34.181766: step: 372/77, loss: 0.01381751149892807 2023-01-22 09:00:35.455493: step: 376/77, loss: 0.00032086059218272567 2023-01-22 09:00:36.731233: step: 380/77, loss: 0.001289880950935185 2023-01-22 09:00:38.042647: step: 384/77, loss: 0.00010622338595567271 2023-01-22 09:00:39.412324: step: 388/77, loss: 0.02440432272851467 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Chinese: {'template': {'p': 0.9436619718309859, 'r': 0.5234375, 'f1': 0.6733668341708542}, 'slot': {'p': 0.5, 'r': 0.0173992673992674, 'f1': 0.033628318584070796}, 'combined': 0.022644194423444654, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Korean: {'template': {'p': 0.9436619718309859, 'r': 0.5234375, 'f1': 0.6733668341708542}, 'slot': {'p': 0.5135135135135135, 'r': 0.0173992673992674, 'f1': 0.03365810451727192}, 'combined': 0.02266425128298712, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Russian: {'template': {'p': 0.9436619718309859, 'r': 0.5234375, 'f1': 0.6733668341708542}, 'slot': {'p': 0.5, 'r': 0.0173992673992674, 'f1': 0.033628318584070796}, 'combined': 0.022644194423444654, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 21 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:02:26.109093: step: 4/77, loss: 0.022796493023633957 2023-01-22 09:02:27.414472: step: 8/77, loss: 0.0644044354557991 2023-01-22 09:02:28.684632: step: 12/77, loss: 0.026123739778995514 2023-01-22 09:02:29.994001: step: 16/77, loss: 0.0022905736695975065 2023-01-22 09:02:31.350998: step: 20/77, loss: 0.02266693487763405 2023-01-22 09:02:32.692614: step: 24/77, loss: 1.8297017959412187e-05 2023-01-22 09:02:34.001737: step: 28/77, loss: 5.624979530693963e-05 2023-01-22 09:02:35.339582: step: 32/77, loss: 0.029617756605148315 2023-01-22 09:02:36.716357: step: 36/77, loss: 9.667923222878017e-06 2023-01-22 09:02:38.036571: step: 40/77, loss: 0.21155443787574768 2023-01-22 09:02:39.348094: step: 44/77, loss: 0.0017198350979015231 2023-01-22 09:02:40.673991: step: 48/77, loss: 0.004426266066730022 2023-01-22 09:02:42.035005: step: 52/77, loss: 1.6712678188923746e-05 2023-01-22 09:02:43.384198: step: 56/77, loss: 0.0006967654335312545 2023-01-22 09:02:44.723034: step: 60/77, loss: 0.001149756950326264 2023-01-22 09:02:46.060378: step: 64/77, loss: 0.002224504482001066 2023-01-22 09:02:47.374283: step: 68/77, loss: 7.791274583723862e-06 2023-01-22 09:02:48.716109: step: 72/77, loss: 0.0003872505621984601 2023-01-22 09:02:50.107738: step: 76/77, loss: 0.0235122237354517 2023-01-22 09:02:51.429148: step: 80/77, loss: 0.01756272278726101 2023-01-22 09:02:52.767516: step: 84/77, loss: 0.011864123865962029 2023-01-22 09:02:54.050635: step: 88/77, loss: 0.0001767357753124088 2023-01-22 09:02:55.333218: step: 92/77, loss: 3.5476004995871335e-06 2023-01-22 09:02:56.625749: step: 96/77, loss: 9.594253060640767e-05 2023-01-22 09:02:57.992896: step: 100/77, loss: 0.0008208492654375732 2023-01-22 09:02:59.300911: step: 104/77, loss: 6.475725058407988e-06 2023-01-22 09:03:00.609845: step: 108/77, loss: 0.005691048689186573 2023-01-22 09:03:01.911293: step: 112/77, loss: 4.101833837921731e-06 2023-01-22 09:03:03.243309: step: 116/77, loss: 0.0011443725088611245 2023-01-22 09:03:04.564211: step: 120/77, loss: 8.79143897236645e-07 2023-01-22 09:03:05.881969: step: 124/77, loss: 0.000219722292968072 2023-01-22 09:03:07.184067: step: 128/77, loss: 0.0002379810030106455 2023-01-22 09:03:08.471226: step: 132/77, loss: 8.631691889604554e-05 2023-01-22 09:03:09.807139: step: 136/77, loss: 0.00938483141362667 2023-01-22 09:03:11.068719: step: 140/77, loss: 0.0001489740243414417 2023-01-22 09:03:12.477148: step: 144/77, loss: 1.5052801245474257e-05 2023-01-22 09:03:13.761482: step: 148/77, loss: 0.026118695735931396 2023-01-22 09:03:15.048390: step: 152/77, loss: 0.07806071639060974 2023-01-22 09:03:16.408290: step: 156/77, loss: 5.0516373448772356e-05 2023-01-22 09:03:17.735178: step: 160/77, loss: 0.0023060047533363104 2023-01-22 09:03:19.082590: step: 164/77, loss: 0.00032793389982543886 2023-01-22 09:03:20.440937: step: 168/77, loss: 0.00041060629882849753 2023-01-22 09:03:21.730681: step: 172/77, loss: 3.9587655919604003e-05 2023-01-22 09:03:23.102315: step: 176/77, loss: 0.00928050372749567 2023-01-22 09:03:24.385285: step: 180/77, loss: 0.0007714069215580821 2023-01-22 09:03:25.672748: step: 184/77, loss: 0.0021703136153519154 2023-01-22 09:03:27.044146: step: 188/77, loss: 5.304771661940322e-07 2023-01-22 09:03:28.295878: step: 192/77, loss: 0.024455228820443153 2023-01-22 09:03:29.593564: step: 196/77, loss: 0.001490587368607521 2023-01-22 09:03:30.895910: step: 200/77, loss: 0.00020829432469327003 2023-01-22 09:03:32.201551: step: 204/77, loss: 0.00034719277755357325 2023-01-22 09:03:33.546384: step: 208/77, loss: 1.3317236152943224e-05 2023-01-22 09:03:34.840928: step: 212/77, loss: 9.691724699223414e-05 2023-01-22 09:03:36.190622: step: 216/77, loss: 0.23184849321842194 2023-01-22 09:03:37.430255: step: 220/77, loss: 0.00047657586401328444 2023-01-22 09:03:38.791284: step: 224/77, loss: 0.00017672436661086977 2023-01-22 09:03:40.140036: step: 228/77, loss: 0.0009486278286203742 2023-01-22 09:03:41.459708: step: 232/77, loss: 1.5350226021837443e-05 2023-01-22 09:03:42.785333: step: 236/77, loss: 0.00015230315329972655 2023-01-22 09:03:44.064799: step: 240/77, loss: 0.12049225717782974 2023-01-22 09:03:45.363992: step: 244/77, loss: 0.0015533717814832926 2023-01-22 09:03:46.719037: step: 248/77, loss: 0.00011515563528519124 2023-01-22 09:03:48.028024: step: 252/77, loss: 0.0006750301108695567 2023-01-22 09:03:49.275035: step: 256/77, loss: 0.0003167404211126268 2023-01-22 09:03:50.583216: step: 260/77, loss: 4.348805668996647e-05 2023-01-22 09:03:51.896261: step: 264/77, loss: 0.0006341171683743596 2023-01-22 09:03:53.188590: step: 268/77, loss: 0.00039305503014475107 2023-01-22 09:03:54.427590: step: 272/77, loss: 0.0063155763782560825 2023-01-22 09:03:55.718784: step: 276/77, loss: 0.015269141644239426 2023-01-22 09:03:57.065649: step: 280/77, loss: 0.000924046034924686 2023-01-22 09:03:58.367142: step: 284/77, loss: 0.0018953699618577957 2023-01-22 09:03:59.689043: step: 288/77, loss: 0.0002120360150001943 2023-01-22 09:04:01.025288: step: 292/77, loss: 0.009600703604519367 2023-01-22 09:04:02.309366: step: 296/77, loss: 0.006479513365775347 2023-01-22 09:04:03.567544: step: 300/77, loss: 0.0001296757982345298 2023-01-22 09:04:04.899310: step: 304/77, loss: 0.01025966927409172 2023-01-22 09:04:06.228118: step: 308/77, loss: 3.9645419747103006e-05 2023-01-22 09:04:07.538361: step: 312/77, loss: 8.368302951566875e-05 2023-01-22 09:04:08.875031: step: 316/77, loss: 0.07057785987854004 2023-01-22 09:04:10.219648: step: 320/77, loss: 9.552506526233628e-05 2023-01-22 09:04:11.495583: step: 324/77, loss: 0.027393445372581482 2023-01-22 09:04:12.797244: step: 328/77, loss: 0.05229645222425461 2023-01-22 09:04:14.104832: step: 332/77, loss: 3.1888174589767004e-07 2023-01-22 09:04:15.407031: step: 336/77, loss: 4.003276990260929e-05 2023-01-22 09:04:16.757613: step: 340/77, loss: 0.005392936524003744 2023-01-22 09:04:18.085700: step: 344/77, loss: 0.00012345942377578467 2023-01-22 09:04:19.413345: step: 348/77, loss: 0.00040279352106153965 2023-01-22 09:04:20.750285: step: 352/77, loss: 0.000411268207244575 2023-01-22 09:04:22.078329: step: 356/77, loss: 0.0016155533958226442 2023-01-22 09:04:23.387193: step: 360/77, loss: 0.00015825206355657429 2023-01-22 09:04:24.728933: step: 364/77, loss: 0.030476603657007217 2023-01-22 09:04:25.974743: step: 368/77, loss: 0.0009514524135738611 2023-01-22 09:04:27.275172: step: 372/77, loss: 0.00010489935812074691 2023-01-22 09:04:28.585999: step: 376/77, loss: 1.1905699466296937e-06 2023-01-22 09:04:29.953012: step: 380/77, loss: 0.0012081509921699762 2023-01-22 09:04:31.257554: step: 384/77, loss: 0.030084820464253426 2023-01-22 09:04:32.563214: step: 388/77, loss: 0.01937655545771122 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Chinese: {'template': {'p': 0.9705882352941176, 'r': 0.515625, 'f1': 0.673469387755102}, 'slot': {'p': 0.53125, 'r': 0.015567765567765568, 'f1': 0.0302491103202847}, 'combined': 0.020371849807538676, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Korean: {'template': {'p': 0.9558823529411765, 'r': 0.5078125, 'f1': 0.6632653061224489}, 'slot': {'p': 0.5151515151515151, 'r': 0.015567765567765568, 'f1': 0.030222222222222227}, 'combined': 0.020045351473922904, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Russian: {'template': {'p': 0.9714285714285714, 'r': 0.53125, 'f1': 0.6868686868686867}, 'slot': {'p': 0.5161290322580645, 'r': 0.014652014652014652, 'f1': 0.028495102404274268}, 'combined': 0.019572393570612624, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 22 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:06:19.112397: step: 4/77, loss: 0.0034078971948474646 2023-01-22 09:06:20.402074: step: 8/77, loss: 0.017262566834688187 2023-01-22 09:06:21.761409: step: 12/77, loss: 0.00021428756008390337 2023-01-22 09:06:23.102614: step: 16/77, loss: 0.013926129788160324 2023-01-22 09:06:24.419815: step: 20/77, loss: 1.1324713113936014e-06 2023-01-22 09:06:25.725305: step: 24/77, loss: 0.00899231806397438 2023-01-22 09:06:27.061627: step: 28/77, loss: 5.312186112860218e-05 2023-01-22 09:06:28.390694: step: 32/77, loss: 0.000548401556443423 2023-01-22 09:06:29.682470: step: 36/77, loss: 2.640987622726243e-05 2023-01-22 09:06:30.969564: step: 40/77, loss: 0.0005899526877328753 2023-01-22 09:06:32.280577: step: 44/77, loss: 0.04716620221734047 2023-01-22 09:06:33.588456: step: 48/77, loss: 8.14858649391681e-05 2023-01-22 09:06:34.877451: step: 52/77, loss: 2.397433718215325e-06 2023-01-22 09:06:36.192712: step: 56/77, loss: 0.03809444233775139 2023-01-22 09:06:37.523302: step: 60/77, loss: 4.114320836379193e-05 2023-01-22 09:06:38.785733: step: 64/77, loss: 0.0005176113336347044 2023-01-22 09:06:40.043807: step: 68/77, loss: 0.00018408146570436656 2023-01-22 09:06:41.363810: step: 72/77, loss: 0.0005031367181800306 2023-01-22 09:06:42.635922: step: 76/77, loss: 0.00011992393410764635 2023-01-22 09:06:43.892319: step: 80/77, loss: 0.08048339188098907 2023-01-22 09:06:45.171927: step: 84/77, loss: 0.0002451002364978194 2023-01-22 09:06:46.471997: step: 88/77, loss: 0.008995145559310913 2023-01-22 09:06:47.733780: step: 92/77, loss: 0.0007470811833627522 2023-01-22 09:06:49.058000: step: 96/77, loss: 0.00022808456560596824 2023-01-22 09:06:50.394973: step: 100/77, loss: 2.975380266434513e-05 2023-01-22 09:06:51.745224: step: 104/77, loss: 6.816323002567515e-05 2023-01-22 09:06:53.081007: step: 108/77, loss: 5.16979162057396e-05 2023-01-22 09:06:54.362428: step: 112/77, loss: 3.284345075371675e-05 2023-01-22 09:06:55.661575: step: 116/77, loss: 0.01112334430217743 2023-01-22 09:06:57.007924: step: 120/77, loss: 0.025925541296601295 2023-01-22 09:06:58.305128: step: 124/77, loss: 6.150685658212751e-05 2023-01-22 09:06:59.635638: step: 128/77, loss: 0.0017336109885945916 2023-01-22 09:07:00.974004: step: 132/77, loss: 0.012070889584720135 2023-01-22 09:07:02.319593: step: 136/77, loss: 0.032958123832941055 2023-01-22 09:07:03.607033: step: 140/77, loss: 0.04690272733569145 2023-01-22 09:07:04.931310: step: 144/77, loss: 6.709252375003416e-06 2023-01-22 09:07:06.263330: step: 148/77, loss: 0.0639430582523346 2023-01-22 09:07:07.586803: step: 152/77, loss: 0.0006111941183917224 2023-01-22 09:07:08.882131: step: 156/77, loss: 0.0046700369566679 2023-01-22 09:07:10.260303: step: 160/77, loss: 0.0025426128413528204 2023-01-22 09:07:11.594714: step: 164/77, loss: 0.0014713435666635633 2023-01-22 09:07:12.885337: step: 168/77, loss: 5.589408829109743e-05 2023-01-22 09:07:14.204035: step: 172/77, loss: 0.016112398356199265 2023-01-22 09:07:15.490392: step: 176/77, loss: 0.013365473598241806 2023-01-22 09:07:16.770188: step: 180/77, loss: 0.0001512926974100992 2023-01-22 09:07:18.073372: step: 184/77, loss: 0.009979977272450924 2023-01-22 09:07:19.371501: step: 188/77, loss: 0.6408872604370117 2023-01-22 09:07:20.723431: step: 192/77, loss: 8.001852620509453e-07 2023-01-22 09:07:22.054813: step: 196/77, loss: 2.3612466975464486e-05 2023-01-22 09:07:23.402051: step: 200/77, loss: 0.002872622571885586 2023-01-22 09:07:24.745771: step: 204/77, loss: 0.00032813759753480554 2023-01-22 09:07:26.041487: step: 208/77, loss: 0.017089366912841797 2023-01-22 09:07:27.360792: step: 212/77, loss: 0.00020115751249250025 2023-01-22 09:07:28.688431: step: 216/77, loss: 0.00011558117694221437 2023-01-22 09:07:29.989358: step: 220/77, loss: 0.0005581318982876837 2023-01-22 09:07:31.268643: step: 224/77, loss: 0.005376110784709454 2023-01-22 09:07:32.589176: step: 228/77, loss: 2.5518758775433525e-05 2023-01-22 09:07:33.930243: step: 232/77, loss: 0.0942663624882698 2023-01-22 09:07:35.192890: step: 236/77, loss: 6.049809826436103e-07 2023-01-22 09:07:36.456465: step: 240/77, loss: 0.0003028716309927404 2023-01-22 09:07:37.827680: step: 244/77, loss: 0.000771807215642184 2023-01-22 09:07:39.140205: step: 248/77, loss: 0.0033995232079178095 2023-01-22 09:07:40.446935: step: 252/77, loss: 0.0004318088758736849 2023-01-22 09:07:41.759907: step: 256/77, loss: 0.00012441341823432595 2023-01-22 09:07:43.145337: step: 260/77, loss: 0.0021375638898462057 2023-01-22 09:07:44.427798: step: 264/77, loss: 0.002261765766888857 2023-01-22 09:07:45.723734: step: 268/77, loss: 1.0147630746359937e-05 2023-01-22 09:07:47.073305: step: 272/77, loss: 6.760523683624342e-05 2023-01-22 09:07:48.413144: step: 276/77, loss: 0.0008380015497095883 2023-01-22 09:07:49.752838: step: 280/77, loss: 0.015756692737340927 2023-01-22 09:07:51.066967: step: 284/77, loss: 0.00037149080890230834 2023-01-22 09:07:52.360286: step: 288/77, loss: 0.0002747896360233426 2023-01-22 09:07:53.675165: step: 292/77, loss: 0.00038542161928489804 2023-01-22 09:07:55.033615: step: 296/77, loss: 0.0015570932300761342 2023-01-22 09:07:56.343167: step: 300/77, loss: 0.05393500253558159 2023-01-22 09:07:57.679446: step: 304/77, loss: 5.701546251657419e-05 2023-01-22 09:07:59.023072: step: 308/77, loss: 7.498095510527492e-05 2023-01-22 09:08:00.363821: step: 312/77, loss: 0.01547918003052473 2023-01-22 09:08:01.642855: step: 316/77, loss: 0.00011507688031997532 2023-01-22 09:08:02.965791: step: 320/77, loss: 1.5538498701062053e-05 2023-01-22 09:08:04.250209: step: 324/77, loss: 0.0026777207385748625 2023-01-22 09:08:05.552546: step: 328/77, loss: 0.009971034713089466 2023-01-22 09:08:06.879816: step: 332/77, loss: 4.887807517661713e-05 2023-01-22 09:08:08.198560: step: 336/77, loss: 0.05552070215344429 2023-01-22 09:08:09.534553: step: 340/77, loss: 0.00015315160271711648 2023-01-22 09:08:10.853661: step: 344/77, loss: 0.0002867273869924247 2023-01-22 09:08:12.185797: step: 348/77, loss: 0.0023228887002915144 2023-01-22 09:08:13.554109: step: 352/77, loss: 6.802859388699289e-06 2023-01-22 09:08:14.849934: step: 356/77, loss: 0.007060936186462641 2023-01-22 09:08:16.191467: step: 360/77, loss: 0.02428770251572132 2023-01-22 09:08:17.589305: step: 364/77, loss: 0.0007483771769329906 2023-01-22 09:08:18.875977: step: 368/77, loss: 0.03832714259624481 2023-01-22 09:08:20.134956: step: 372/77, loss: 0.00018787103181239218 2023-01-22 09:08:21.443678: step: 376/77, loss: 0.0006022527231834829 2023-01-22 09:08:22.748184: step: 380/77, loss: 0.0023554773069918156 2023-01-22 09:08:24.062645: step: 384/77, loss: 0.005550259258598089 2023-01-22 09:08:25.343759: step: 388/77, loss: 6.707603461109102e-06 ================================================== Loss: 0.015 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 22} Test Chinese: {'template': {'p': 0.9836065573770492, 'r': 0.46875, 'f1': 0.6349206349206349}, 'slot': {'p': 0.6521739130434783, 'r': 0.013736263736263736, 'f1': 0.026905829596412557}, 'combined': 0.01708306641042067, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 22} Test Korean: {'template': {'p': 0.9833333333333333, 'r': 0.4609375, 'f1': 0.6276595744680851}, 'slot': {'p': 0.6818181818181818, 'r': 0.013736263736263736, 'f1': 0.026929982046678635}, 'combined': 0.01690286107185148, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 22} Test Russian: {'template': {'p': 0.9833333333333333, 'r': 0.4609375, 'f1': 0.6276595744680851}, 'slot': {'p': 0.6818181818181818, 'r': 0.013736263736263736, 'f1': 0.026929982046678635}, 'combined': 0.01690286107185148, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 23 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:10:11.954893: step: 4/77, loss: 0.02925044298171997 2023-01-22 09:10:13.286988: step: 8/77, loss: 0.0030780029483139515 2023-01-22 09:10:14.598833: step: 12/77, loss: 0.030284637585282326 2023-01-22 09:10:15.906589: step: 16/77, loss: 0.00010271882638335228 2023-01-22 09:10:17.208331: step: 20/77, loss: 0.0010229551699012518 2023-01-22 09:10:18.528086: step: 24/77, loss: 0.0057197012938559055 2023-01-22 09:10:19.803116: step: 28/77, loss: 0.0016985785914584994 2023-01-22 09:10:21.178757: step: 32/77, loss: 0.017662620171904564 2023-01-22 09:10:22.440769: step: 36/77, loss: 0.0006728796288371086 2023-01-22 09:10:23.729414: step: 40/77, loss: 2.5985159481933806e-06 2023-01-22 09:10:25.014112: step: 44/77, loss: 0.00010429038957227021 2023-01-22 09:10:26.328962: step: 48/77, loss: 0.04060396924614906 2023-01-22 09:10:27.653353: step: 52/77, loss: 0.012450166046619415 2023-01-22 09:10:28.886474: step: 56/77, loss: 0.005256613250821829 2023-01-22 09:10:30.211886: step: 60/77, loss: 1.236334992427146e-05 2023-01-22 09:10:31.558284: step: 64/77, loss: 2.8647409635595977e-05 2023-01-22 09:10:32.942283: step: 68/77, loss: 4.4482527300715446e-05 2023-01-22 09:10:34.222747: step: 72/77, loss: 0.02085484005510807 2023-01-22 09:10:35.533455: step: 76/77, loss: 2.1111116438987665e-05 2023-01-22 09:10:36.907951: step: 80/77, loss: 0.0007840099860914052 2023-01-22 09:10:38.191316: step: 84/77, loss: 0.00127530621830374 2023-01-22 09:10:39.488993: step: 88/77, loss: 3.6371507121657487e-06 2023-01-22 09:10:40.823102: step: 92/77, loss: 0.008171175606548786 2023-01-22 09:10:42.180204: step: 96/77, loss: 0.02187040075659752 2023-01-22 09:10:43.510043: step: 100/77, loss: 0.008003543131053448 2023-01-22 09:10:44.796388: step: 104/77, loss: 0.004421391524374485 2023-01-22 09:10:46.120334: step: 108/77, loss: 4.918003469356336e-05 2023-01-22 09:10:47.389621: step: 112/77, loss: 0.011352593079209328 2023-01-22 09:10:48.756830: step: 116/77, loss: 0.001593520981259644 2023-01-22 09:10:50.057519: step: 120/77, loss: 0.031238067895174026 2023-01-22 09:10:51.379375: step: 124/77, loss: 0.01812879368662834 2023-01-22 09:10:52.686521: step: 128/77, loss: 0.0009220782667398453 2023-01-22 09:10:54.030283: step: 132/77, loss: 0.023734448477625847 2023-01-22 09:10:55.362187: step: 136/77, loss: 1.326091296505183e-05 2023-01-22 09:10:56.665010: step: 140/77, loss: 0.004073227755725384 2023-01-22 09:10:57.947461: step: 144/77, loss: 0.0015277594793587923 2023-01-22 09:10:59.280990: step: 148/77, loss: 0.00916389748454094 2023-01-22 09:11:00.619518: step: 152/77, loss: 0.0002302402281202376 2023-01-22 09:11:01.911570: step: 156/77, loss: 0.00012056231935275719 2023-01-22 09:11:03.215671: step: 160/77, loss: 0.0014544213190674782 2023-01-22 09:11:04.577251: step: 164/77, loss: 0.00019257509848102927 2023-01-22 09:11:05.896123: step: 168/77, loss: 3.402936636121012e-05 2023-01-22 09:11:07.244007: step: 172/77, loss: 1.1194384569535032e-05 2023-01-22 09:11:08.590249: step: 176/77, loss: 0.029281653463840485 2023-01-22 09:11:09.922393: step: 180/77, loss: 0.05051398277282715 2023-01-22 09:11:11.230790: step: 184/77, loss: 2.362795567023568e-05 2023-01-22 09:11:12.537162: step: 188/77, loss: 0.00014806289982516319 2023-01-22 09:11:13.813281: step: 192/77, loss: 1.3202284208091442e-06 2023-01-22 09:11:15.133270: step: 196/77, loss: 5.061740739620291e-05 2023-01-22 09:11:16.461916: step: 200/77, loss: 0.009350545704364777 2023-01-22 09:11:17.760194: step: 204/77, loss: 0.014052278362214565 2023-01-22 09:11:19.100937: step: 208/77, loss: 8.189300569938496e-05 2023-01-22 09:11:20.447930: step: 212/77, loss: 0.007922136224806309 2023-01-22 09:11:21.850740: step: 216/77, loss: 8.792057997197844e-06 2023-01-22 09:11:23.174519: step: 220/77, loss: 0.0014992207288742065 2023-01-22 09:11:24.496311: step: 224/77, loss: 0.033945001661777496 2023-01-22 09:11:25.848788: step: 228/77, loss: 9.25993881537579e-05 2023-01-22 09:11:27.205317: step: 232/77, loss: 0.00026498493389226496 2023-01-22 09:11:28.517072: step: 236/77, loss: 2.707368139454047e-06 2023-01-22 09:11:29.893622: step: 240/77, loss: 0.02109416015446186 2023-01-22 09:11:31.186666: step: 244/77, loss: 0.007108455523848534 2023-01-22 09:11:32.495669: step: 248/77, loss: 0.0002526230236981064 2023-01-22 09:11:33.787452: step: 252/77, loss: 0.00032294943230226636 2023-01-22 09:11:35.091556: step: 256/77, loss: 0.0007726695039309561 2023-01-22 09:11:36.470866: step: 260/77, loss: 9.100021270569414e-05 2023-01-22 09:11:37.775656: step: 264/77, loss: 0.07174643129110336 2023-01-22 09:11:39.075954: step: 268/77, loss: 6.0867492720717564e-05 2023-01-22 09:11:40.401356: step: 272/77, loss: 0.0005447212024591863 2023-01-22 09:11:41.743543: step: 276/77, loss: 0.0001453039440093562 2023-01-22 09:11:43.103433: step: 280/77, loss: 0.00035742539330385625 2023-01-22 09:11:44.443327: step: 284/77, loss: 0.03960578143596649 2023-01-22 09:11:45.785611: step: 288/77, loss: 0.00041772908298298717 2023-01-22 09:11:47.094270: step: 292/77, loss: 2.4745031623751856e-05 2023-01-22 09:11:48.415795: step: 296/77, loss: 0.015246432274580002 2023-01-22 09:11:49.728342: step: 300/77, loss: 0.0002701383491512388 2023-01-22 09:11:51.018163: step: 304/77, loss: 0.000890142924617976 2023-01-22 09:11:52.334331: step: 308/77, loss: 0.0024266818072646856 2023-01-22 09:11:53.641889: step: 312/77, loss: 0.0005810699076391757 2023-01-22 09:11:54.892940: step: 316/77, loss: 8.0965746747097e-06 2023-01-22 09:11:56.229941: step: 320/77, loss: 0.0001801561884349212 2023-01-22 09:11:57.542880: step: 324/77, loss: 0.0010214989306405187 2023-01-22 09:11:58.810152: step: 328/77, loss: 6.109410719545849e-07 2023-01-22 09:12:00.153161: step: 332/77, loss: 0.03397783264517784 2023-01-22 09:12:01.490596: step: 336/77, loss: 4.2231633415212855e-05 2023-01-22 09:12:02.799315: step: 340/77, loss: 0.000951576919760555 2023-01-22 09:12:04.090571: step: 344/77, loss: 7.280572754098102e-05 2023-01-22 09:12:05.427379: step: 348/77, loss: 0.001599346986040473 2023-01-22 09:12:06.750089: step: 352/77, loss: 0.02990417554974556 2023-01-22 09:12:08.075931: step: 356/77, loss: 0.0002445604186505079 2023-01-22 09:12:09.461820: step: 360/77, loss: 0.1017698273062706 2023-01-22 09:12:10.804929: step: 364/77, loss: 0.024318577721714973 2023-01-22 09:12:12.152770: step: 368/77, loss: 5.249897367320955e-05 2023-01-22 09:12:13.504911: step: 372/77, loss: 0.0047577316872775555 2023-01-22 09:12:14.799708: step: 376/77, loss: 0.0014664152404293418 2023-01-22 09:12:16.160739: step: 380/77, loss: 0.00015006517060101032 2023-01-22 09:12:17.481497: step: 384/77, loss: 4.29098290624097e-05 2023-01-22 09:12:18.837340: step: 388/77, loss: 3.932953404728323e-05 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 0.9705882352941176, 'r': 0.55, 'f1': 0.7021276595744681}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.047056174715749195, 'epoch': 23} Test Chinese: {'template': {'p': 0.9577464788732394, 'r': 0.53125, 'f1': 0.6834170854271355}, 'slot': {'p': 0.4722222222222222, 'r': 0.015567765567765568, 'f1': 0.030141843971631208}, 'combined': 0.020599451156491675, 'epoch': 23} Dev Korean: {'template': {'p': 0.9705882352941176, 'r': 0.55, 'f1': 0.7021276595744681}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.047056174715749195, 'epoch': 23} Test Korean: {'template': {'p': 0.9577464788732394, 'r': 0.53125, 'f1': 0.6834170854271355}, 'slot': {'p': 0.4722222222222222, 'r': 0.015567765567765568, 'f1': 0.030141843971631208}, 'combined': 0.020599451156491675, 'epoch': 23} Dev Russian: {'template': {'p': 0.9705882352941176, 'r': 0.55, 'f1': 0.7021276595744681}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.047056174715749195, 'epoch': 23} Test Russian: {'template': {'p': 0.9583333333333334, 'r': 0.5390625, 'f1': 0.69}, 'slot': {'p': 0.4722222222222222, 'r': 0.015567765567765568, 'f1': 0.030141843971631208}, 'combined': 0.02079787234042553, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 24 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:14:05.649900: step: 4/77, loss: 0.00027425645384937525 2023-01-22 09:14:06.959141: step: 8/77, loss: 0.00812295451760292 2023-01-22 09:14:08.255303: step: 12/77, loss: 0.00017981577548198402 2023-01-22 09:14:09.575754: step: 16/77, loss: 2.915915956691606e-06 2023-01-22 09:14:10.889898: step: 20/77, loss: 0.015080899000167847 2023-01-22 09:14:12.198783: step: 24/77, loss: 0.00021564430790022016 2023-01-22 09:14:13.523948: step: 28/77, loss: 0.013282394967973232 2023-01-22 09:14:14.861543: step: 32/77, loss: 0.00043583771912381053 2023-01-22 09:14:16.169631: step: 36/77, loss: 0.0005709958495572209 2023-01-22 09:14:17.436309: step: 40/77, loss: 0.012487326748669147 2023-01-22 09:14:18.731174: step: 44/77, loss: 0.0021974733099341393 2023-01-22 09:14:20.063562: step: 48/77, loss: 0.0002029470488196239 2023-01-22 09:14:21.366571: step: 52/77, loss: 0.0013817045837640762 2023-01-22 09:14:22.680512: step: 56/77, loss: 0.0020415547769516706 2023-01-22 09:14:23.970883: step: 60/77, loss: 0.0067010316997766495 2023-01-22 09:14:25.327931: step: 64/77, loss: 0.00014627687050960958 2023-01-22 09:14:26.658524: step: 68/77, loss: 3.250454756198451e-05 2023-01-22 09:14:27.987117: step: 72/77, loss: 0.011801852844655514 2023-01-22 09:14:29.287506: step: 76/77, loss: 7.098000423866324e-06 2023-01-22 09:14:30.574742: step: 80/77, loss: 0.002970988629385829 2023-01-22 09:14:31.835130: step: 84/77, loss: 0.0003883733879774809 2023-01-22 09:14:33.194113: step: 88/77, loss: 0.004043085966259241 2023-01-22 09:14:34.478568: step: 92/77, loss: 4.335409903433174e-05 2023-01-22 09:14:35.730914: step: 96/77, loss: 0.0009157421300187707 2023-01-22 09:14:37.061748: step: 100/77, loss: 0.00040233746403828263 2023-01-22 09:14:38.369403: step: 104/77, loss: 0.007378537207841873 2023-01-22 09:14:39.705153: step: 108/77, loss: 0.0011341262143105268 2023-01-22 09:14:41.079241: step: 112/77, loss: 0.005580637603998184 2023-01-22 09:14:42.458333: step: 116/77, loss: 0.0007130467565730214 2023-01-22 09:14:43.765049: step: 120/77, loss: 0.0002832773025147617 2023-01-22 09:14:45.076010: step: 124/77, loss: 0.06424663960933685 2023-01-22 09:14:46.435925: step: 128/77, loss: 1.1567653928068466e-05 2023-01-22 09:14:47.805365: step: 132/77, loss: 0.12748531997203827 2023-01-22 09:14:49.085742: step: 136/77, loss: 0.0014793593436479568 2023-01-22 09:14:50.404271: step: 140/77, loss: 0.035108745098114014 2023-01-22 09:14:51.771632: step: 144/77, loss: 0.029069863259792328 2023-01-22 09:14:53.121571: step: 148/77, loss: 0.0004990094457753003 2023-01-22 09:14:54.491946: step: 152/77, loss: 0.0552806556224823 2023-01-22 09:14:55.859186: step: 156/77, loss: 5.599405994871631e-05 2023-01-22 09:14:57.117347: step: 160/77, loss: 0.0010671545751392841 2023-01-22 09:14:58.427216: step: 164/77, loss: 9.503433102509007e-05 2023-01-22 09:14:59.719211: step: 168/77, loss: 0.00013206909352447838 2023-01-22 09:15:01.064607: step: 172/77, loss: 0.00036414500209502876 2023-01-22 09:15:02.365749: step: 176/77, loss: 0.000315420504193753 2023-01-22 09:15:03.697357: step: 180/77, loss: 0.010863608680665493 2023-01-22 09:15:05.064095: step: 184/77, loss: 0.0009430536883883178 2023-01-22 09:15:06.394706: step: 188/77, loss: 5.8920064475387335e-05 2023-01-22 09:15:07.720800: step: 192/77, loss: 0.0001401538320351392 2023-01-22 09:15:09.025326: step: 196/77, loss: 0.0005722360219806433 2023-01-22 09:15:10.333067: step: 200/77, loss: 0.0019929315894842148 2023-01-22 09:15:11.622557: step: 204/77, loss: 9.994933861889876e-06 2023-01-22 09:15:12.959750: step: 208/77, loss: 0.0018155412981286645 2023-01-22 09:15:14.250496: step: 212/77, loss: 0.0002928571484517306 2023-01-22 09:15:15.561015: step: 216/77, loss: 0.0019531085854396224 2023-01-22 09:15:16.864127: step: 220/77, loss: 0.0004826158401556313 2023-01-22 09:15:18.174169: step: 224/77, loss: 0.0001760215818649158 2023-01-22 09:15:19.452876: step: 228/77, loss: 0.0014431884046643972 2023-01-22 09:15:20.761857: step: 232/77, loss: 1.1321411875542253e-05 2023-01-22 09:15:22.090409: step: 236/77, loss: 2.3370621420326643e-05 2023-01-22 09:15:23.446563: step: 240/77, loss: 0.026028109714388847 2023-01-22 09:15:24.833035: step: 244/77, loss: 0.028926406055688858 2023-01-22 09:15:26.142945: step: 248/77, loss: 0.00455608032643795 2023-01-22 09:15:27.407468: step: 252/77, loss: 5.085330121801235e-05 2023-01-22 09:15:28.725792: step: 256/77, loss: 0.0015485123731195927 2023-01-22 09:15:29.998472: step: 260/77, loss: 0.0001962407404789701 2023-01-22 09:15:31.322492: step: 264/77, loss: 0.00013164323172532022 2023-01-22 09:15:32.616772: step: 268/77, loss: 0.015872662886977196 2023-01-22 09:15:33.945380: step: 272/77, loss: 0.004312410019338131 2023-01-22 09:15:35.292732: step: 276/77, loss: 0.000288039242150262 2023-01-22 09:15:36.625257: step: 280/77, loss: 0.0019080275669693947 2023-01-22 09:15:37.946653: step: 284/77, loss: 0.004176371265202761 2023-01-22 09:15:39.284205: step: 288/77, loss: 0.002108911285176873 2023-01-22 09:15:40.647223: step: 292/77, loss: 0.0026260290760546923 2023-01-22 09:15:42.025472: step: 296/77, loss: 0.00010112720337929204 2023-01-22 09:15:43.367394: step: 300/77, loss: 0.014249087311327457 2023-01-22 09:15:44.724985: step: 304/77, loss: 0.014059971086680889 2023-01-22 09:15:46.034198: step: 308/77, loss: 0.001644664560444653 2023-01-22 09:15:47.345043: step: 312/77, loss: 1.3291371487866854e-06 2023-01-22 09:15:48.674504: step: 316/77, loss: 0.03550218790769577 2023-01-22 09:15:50.055430: step: 320/77, loss: 0.00010493921581655741 2023-01-22 09:15:51.380066: step: 324/77, loss: 8.642347529530525e-05 2023-01-22 09:15:52.726832: step: 328/77, loss: 6.938765181985218e-06 2023-01-22 09:15:54.011687: step: 332/77, loss: 1.317476107942639e-05 2023-01-22 09:15:55.362256: step: 336/77, loss: 0.05840739607810974 2023-01-22 09:15:56.702435: step: 340/77, loss: 0.0179511196911335 2023-01-22 09:15:58.026035: step: 344/77, loss: 0.00040283938869833946 2023-01-22 09:15:59.333112: step: 348/77, loss: 4.836374046135461e-06 2023-01-22 09:16:00.650535: step: 352/77, loss: 8.54506652103737e-06 2023-01-22 09:16:01.970311: step: 356/77, loss: 0.0029704140033572912 2023-01-22 09:16:03.299197: step: 360/77, loss: 3.103607696175459e-06 2023-01-22 09:16:04.587656: step: 364/77, loss: 0.0021579840686172247 2023-01-22 09:16:05.893535: step: 368/77, loss: 1.236791860037556e-07 2023-01-22 09:16:07.198060: step: 372/77, loss: 2.22967064473778e-05 2023-01-22 09:16:08.499526: step: 376/77, loss: 4.1783634515013546e-05 2023-01-22 09:16:09.886670: step: 380/77, loss: 7.955438377393875e-06 2023-01-22 09:16:11.197897: step: 384/77, loss: 0.04268309473991394 2023-01-22 09:16:12.499000: step: 388/77, loss: 0.005105479154735804 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Chinese: {'template': {'p': 0.9838709677419355, 'r': 0.4765625, 'f1': 0.6421052631578947}, 'slot': {'p': 0.5714285714285714, 'r': 0.014652014652014652, 'f1': 0.02857142857142857}, 'combined': 0.018345864661654134, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Korean: {'template': {'p': 0.9682539682539683, 'r': 0.4765625, 'f1': 0.6387434554973821}, 'slot': {'p': 0.5517241379310345, 'r': 0.014652014652014652, 'f1': 0.028545941123996433}, 'combined': 0.018233533073966305, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Russian: {'template': {'p': 0.9838709677419355, 'r': 0.4765625, 'f1': 0.6421052631578947}, 'slot': {'p': 0.5714285714285714, 'r': 0.014652014652014652, 'f1': 0.02857142857142857}, 'combined': 0.018345864661654134, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 25 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:18:00.051926: step: 4/77, loss: 0.00018349630408920348 2023-01-22 09:18:01.411152: step: 8/77, loss: 0.0008007582509890199 2023-01-22 09:18:02.704328: step: 12/77, loss: 0.00010514700261410326 2023-01-22 09:18:03.974917: step: 16/77, loss: 1.3514750207832549e-05 2023-01-22 09:18:05.282458: step: 20/77, loss: 4.604392529472534e-07 2023-01-22 09:18:06.613638: step: 24/77, loss: 0.001322218682616949 2023-01-22 09:18:07.924231: step: 28/77, loss: 3.4719457175924617e-07 2023-01-22 09:18:09.227497: step: 32/77, loss: 4.3251011447864585e-06 2023-01-22 09:18:10.519334: step: 36/77, loss: 0.02783278562128544 2023-01-22 09:18:11.881038: step: 40/77, loss: 0.0031175236217677593 2023-01-22 09:18:13.200591: step: 44/77, loss: 2.2962005914450856e-06 2023-01-22 09:18:14.507092: step: 48/77, loss: 0.011918090283870697 2023-01-22 09:18:15.855395: step: 52/77, loss: 0.00037522020284086466 2023-01-22 09:18:17.195839: step: 56/77, loss: 4.917350224786787e-07 2023-01-22 09:18:18.504272: step: 60/77, loss: 6.5202999394387e-05 2023-01-22 09:18:19.844155: step: 64/77, loss: 0.0005991798243485391 2023-01-22 09:18:21.153709: step: 68/77, loss: 9.144550858763978e-06 2023-01-22 09:18:22.504595: step: 72/77, loss: 0.00012539334420580417 2023-01-22 09:18:23.820122: step: 76/77, loss: 4.0023842302616686e-05 2023-01-22 09:18:25.118359: step: 80/77, loss: 7.099514732544776e-06 2023-01-22 09:18:26.435966: step: 84/77, loss: 0.0011885878629982471 2023-01-22 09:18:27.711008: step: 88/77, loss: 5.400500958785415e-05 2023-01-22 09:18:29.016418: step: 92/77, loss: 0.00023997691459953785 2023-01-22 09:18:30.336111: step: 96/77, loss: 0.00022390282538253814 2023-01-22 09:18:31.669333: step: 100/77, loss: 2.1500749426195398e-05 2023-01-22 09:18:33.011754: step: 104/77, loss: 0.0019608705770224333 2023-01-22 09:18:34.344613: step: 108/77, loss: 9.088594197237398e-06 2023-01-22 09:18:35.675764: step: 112/77, loss: 6.883950845804065e-05 2023-01-22 09:18:37.000768: step: 116/77, loss: 0.017398864030838013 2023-01-22 09:18:38.310441: step: 120/77, loss: 0.00023948725720401853 2023-01-22 09:18:39.578099: step: 124/77, loss: 2.577894235855638e-07 2023-01-22 09:18:40.856654: step: 128/77, loss: 0.0002802223898470402 2023-01-22 09:18:42.208480: step: 132/77, loss: 0.00019985133258160204 2023-01-22 09:18:43.538431: step: 136/77, loss: 0.00013735542597714812 2023-01-22 09:18:44.855936: step: 140/77, loss: 0.0003241307276766747 2023-01-22 09:18:46.150994: step: 144/77, loss: 0.03896763175725937 2023-01-22 09:18:47.412303: step: 148/77, loss: 3.923150870832615e-06 2023-01-22 09:18:48.724608: step: 152/77, loss: 7.137002830859274e-05 2023-01-22 09:18:50.037449: step: 156/77, loss: 2.2531385184265673e-05 2023-01-22 09:18:51.346679: step: 160/77, loss: 0.00044442887883633375 2023-01-22 09:18:52.651660: step: 164/77, loss: 0.00019603893451858312 2023-01-22 09:18:53.952676: step: 168/77, loss: 0.013213117606937885 2023-01-22 09:18:55.291554: step: 172/77, loss: 0.0005997586413286626 2023-01-22 09:18:56.625648: step: 176/77, loss: 0.010040149092674255 2023-01-22 09:18:57.913883: step: 180/77, loss: 0.002243348164483905 2023-01-22 09:18:59.260891: step: 184/77, loss: 0.0003383896255400032 2023-01-22 09:19:00.556157: step: 188/77, loss: 0.00788046233355999 2023-01-22 09:19:01.863951: step: 192/77, loss: 2.4015640519792214e-05 2023-01-22 09:19:03.187849: step: 196/77, loss: 0.00046700090751983225 2023-01-22 09:19:04.494802: step: 200/77, loss: 3.809694680967368e-05 2023-01-22 09:19:05.783845: step: 204/77, loss: 5.260309626464732e-05 2023-01-22 09:19:07.053880: step: 208/77, loss: 1.1607809256020118e-06 2023-01-22 09:19:08.355178: step: 212/77, loss: 7.758028004900552e-06 2023-01-22 09:19:09.648483: step: 216/77, loss: 4.928519774693996e-05 2023-01-22 09:19:10.963055: step: 220/77, loss: 0.004321509972214699 2023-01-22 09:19:12.300277: step: 224/77, loss: 0.0005570852081291378 2023-01-22 09:19:13.622925: step: 228/77, loss: 0.0009109312086366117 2023-01-22 09:19:14.957672: step: 232/77, loss: 9.968431550078094e-05 2023-01-22 09:19:16.297796: step: 236/77, loss: 6.817249959567562e-05 2023-01-22 09:19:17.599624: step: 240/77, loss: 5.587870077761181e-07 2023-01-22 09:19:18.958982: step: 244/77, loss: 0.004906816873699427 2023-01-22 09:19:20.253932: step: 248/77, loss: 5.3022653446532786e-05 2023-01-22 09:19:21.562721: step: 252/77, loss: 0.075285904109478 2023-01-22 09:19:22.862057: step: 256/77, loss: 0.0018372680060565472 2023-01-22 09:19:24.249402: step: 260/77, loss: 7.74833097239025e-05 2023-01-22 09:19:25.590887: step: 264/77, loss: 0.00038714701076969504 2023-01-22 09:19:26.962234: step: 268/77, loss: 0.0013665605802088976 2023-01-22 09:19:28.280330: step: 272/77, loss: 0.003437809646129608 2023-01-22 09:19:29.631192: step: 276/77, loss: 0.008186004124581814 2023-01-22 09:19:30.955317: step: 280/77, loss: 0.0013224020367488265 2023-01-22 09:19:32.261288: step: 284/77, loss: 0.0003919194859918207 2023-01-22 09:19:33.603082: step: 288/77, loss: 0.002622979925945401 2023-01-22 09:19:34.913552: step: 292/77, loss: 1.1907065527339e-05 2023-01-22 09:19:36.186275: step: 296/77, loss: 1.3178715562389698e-05 2023-01-22 09:19:37.445578: step: 300/77, loss: 6.871306595712667e-06 2023-01-22 09:19:38.739616: step: 304/77, loss: 1.4133414879324846e-05 2023-01-22 09:19:40.060045: step: 308/77, loss: 2.1382591057772515e-06 2023-01-22 09:19:41.344328: step: 312/77, loss: 0.0003209102724213153 2023-01-22 09:19:42.671224: step: 316/77, loss: 1.0722834304033313e-05 2023-01-22 09:19:44.042662: step: 320/77, loss: 0.04908447712659836 2023-01-22 09:19:45.362520: step: 324/77, loss: 0.016604645177721977 2023-01-22 09:19:46.648552: step: 328/77, loss: 0.01431315392255783 2023-01-22 09:19:48.054612: step: 332/77, loss: 3.631072104326449e-05 2023-01-22 09:19:49.368296: step: 336/77, loss: 5.555855022976175e-05 2023-01-22 09:19:50.647782: step: 340/77, loss: 0.004288515541702509 2023-01-22 09:19:51.975644: step: 344/77, loss: 9.268520079785958e-05 2023-01-22 09:19:53.296633: step: 348/77, loss: 3.883116733049974e-06 2023-01-22 09:19:54.647496: step: 352/77, loss: 1.0829372513398994e-05 2023-01-22 09:19:55.995766: step: 356/77, loss: 2.267902118546772e-06 2023-01-22 09:19:57.337320: step: 360/77, loss: 2.5786117475945503e-05 2023-01-22 09:19:58.632798: step: 364/77, loss: 4.100824662600644e-05 2023-01-22 09:19:59.942366: step: 368/77, loss: 2.7342935936758295e-05 2023-01-22 09:20:01.254587: step: 372/77, loss: 0.014448979869484901 2023-01-22 09:20:02.654145: step: 376/77, loss: 0.0063176341354846954 2023-01-22 09:20:03.990194: step: 380/77, loss: 0.2428482174873352 2023-01-22 09:20:05.338781: step: 384/77, loss: 1.3098239833198022e-05 2023-01-22 09:20:06.655894: step: 388/77, loss: 0.0023457477800548077 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Chinese: {'template': {'p': 0.9571428571428572, 'r': 0.5234375, 'f1': 0.6767676767676768}, 'slot': {'p': 0.5151515151515151, 'r': 0.015567765567765568, 'f1': 0.030222222222222227}, 'combined': 0.02045342312008979, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Korean: {'template': {'p': 0.9852941176470589, 'r': 0.5234375, 'f1': 0.6836734693877551}, 'slot': {'p': 0.5862068965517241, 'r': 0.015567765567765568, 'f1': 0.030330062444246204}, 'combined': 0.020735859018005058, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Russian: {'template': {'p': 0.9705882352941176, 'r': 0.515625, 'f1': 0.673469387755102}, 'slot': {'p': 0.5862068965517241, 'r': 0.015567765567765568, 'f1': 0.030330062444246204}, 'combined': 0.020426368584900503, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 26 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:21:53.224244: step: 4/77, loss: 0.0005963248549960554 2023-01-22 09:21:54.563459: step: 8/77, loss: 9.357670478493674e-07 2023-01-22 09:21:55.883111: step: 12/77, loss: 0.0045157684944570065 2023-01-22 09:21:57.169099: step: 16/77, loss: 4.600652573572006e-06 2023-01-22 09:21:58.461263: step: 20/77, loss: 7.227034188872494e-07 2023-01-22 09:21:59.742541: step: 24/77, loss: 1.8262548110214993e-05 2023-01-22 09:22:01.052878: step: 28/77, loss: 3.110050965915434e-05 2023-01-22 09:22:02.377068: step: 32/77, loss: 0.00761661259457469 2023-01-22 09:22:03.715230: step: 36/77, loss: 0.0003865267208311707 2023-01-22 09:22:05.020900: step: 40/77, loss: 7.569610170321539e-05 2023-01-22 09:22:06.321063: step: 44/77, loss: 6.36163167655468e-06 2023-01-22 09:22:07.627077: step: 48/77, loss: 1.2602094102476258e-05 2023-01-22 09:22:08.930259: step: 52/77, loss: 3.6071519389224704e-06 2023-01-22 09:22:10.267679: step: 56/77, loss: 6.752765784767689e-06 2023-01-22 09:22:11.553949: step: 60/77, loss: 0.0022517337929457426 2023-01-22 09:22:12.871231: step: 64/77, loss: 1.1220433862035861e-06 2023-01-22 09:22:14.188642: step: 68/77, loss: 0.00022226989676710218 2023-01-22 09:22:15.507609: step: 72/77, loss: 2.300361666129902e-05 2023-01-22 09:22:16.803819: step: 76/77, loss: 0.0028344355523586273 2023-01-22 09:22:18.066815: step: 80/77, loss: 5.2059611334698275e-05 2023-01-22 09:22:19.371945: step: 84/77, loss: 2.0697179934359156e-06 2023-01-22 09:22:20.703688: step: 88/77, loss: 3.142713103443384e-05 2023-01-22 09:22:21.994487: step: 92/77, loss: 3.7010131563874893e-06 2023-01-22 09:22:23.325653: step: 96/77, loss: 3.6026956422574585e-06 2023-01-22 09:22:24.645781: step: 100/77, loss: 0.007741418667137623 2023-01-22 09:22:25.959544: step: 104/77, loss: 0.00021697793272323906 2023-01-22 09:22:27.301429: step: 108/77, loss: 0.0007774491677992046 2023-01-22 09:22:28.609262: step: 112/77, loss: 0.00023736809089314193 2023-01-22 09:22:29.894982: step: 116/77, loss: 7.488654773624148e-06 2023-01-22 09:22:31.230157: step: 120/77, loss: 0.00014339134213514626 2023-01-22 09:22:32.562063: step: 124/77, loss: 6.211158051883103e-06 2023-01-22 09:22:33.852642: step: 128/77, loss: 1.733893259370234e-05 2023-01-22 09:22:35.143725: step: 132/77, loss: 0.000151737971464172 2023-01-22 09:22:36.495428: step: 136/77, loss: 4.7963334509404376e-05 2023-01-22 09:22:37.787984: step: 140/77, loss: 0.005837517324835062 2023-01-22 09:22:39.072537: step: 144/77, loss: 0.016650885343551636 2023-01-22 09:22:40.388167: step: 148/77, loss: 0.042808350175619125 2023-01-22 09:22:41.700948: step: 152/77, loss: 2.871876858989708e-05 2023-01-22 09:22:43.044576: step: 156/77, loss: 9.238529514732363e-07 2023-01-22 09:22:44.387583: step: 160/77, loss: 9.037885320140049e-06 2023-01-22 09:22:45.718172: step: 164/77, loss: 0.0006664558313786983 2023-01-22 09:22:47.004438: step: 168/77, loss: 1.110103994506062e-06 2023-01-22 09:22:48.291428: step: 172/77, loss: 6.452086154240533e-07 2023-01-22 09:22:49.628584: step: 176/77, loss: 0.018400171771645546 2023-01-22 09:22:50.956538: step: 180/77, loss: 0.0945664495229721 2023-01-22 09:22:52.267988: step: 184/77, loss: 2.980229574234272e-08 2023-01-22 09:22:53.591954: step: 188/77, loss: 0.0011638787109404802 2023-01-22 09:22:54.938246: step: 192/77, loss: 0.017570655792951584 2023-01-22 09:22:56.288313: step: 196/77, loss: 2.538481385272462e-05 2023-01-22 09:22:57.584329: step: 200/77, loss: 2.4982262402772903e-05 2023-01-22 09:22:58.880359: step: 204/77, loss: 0.0003960702451877296 2023-01-22 09:23:00.223720: step: 208/77, loss: 6.644204859185265e-06 2023-01-22 09:23:01.542803: step: 212/77, loss: 0.00917132943868637 2023-01-22 09:23:02.825736: step: 216/77, loss: 0.018628239631652832 2023-01-22 09:23:04.134214: step: 220/77, loss: 0.016373340040445328 2023-01-22 09:23:05.410668: step: 224/77, loss: 8.663587323098909e-06 2023-01-22 09:23:06.735965: step: 228/77, loss: 0.00015038135461509228 2023-01-22 09:23:08.059255: step: 232/77, loss: 1.5028490452095866e-05 2023-01-22 09:23:09.408191: step: 236/77, loss: 6.243532197913737e-07 2023-01-22 09:23:10.722120: step: 240/77, loss: 1.8378557797404937e-05 2023-01-22 09:23:12.033428: step: 244/77, loss: 0.0005751802236773074 2023-01-22 09:23:13.388721: step: 248/77, loss: 8.018794324016199e-05 2023-01-22 09:23:14.653291: step: 252/77, loss: 5.468697850119497e-07 2023-01-22 09:23:15.966705: step: 256/77, loss: 3.7664656247216044e-06 2023-01-22 09:23:17.270596: step: 260/77, loss: 5.274927752907388e-05 2023-01-22 09:23:18.610511: step: 264/77, loss: 0.01224945392459631 2023-01-22 09:23:19.886689: step: 268/77, loss: 0.0007441304042004049 2023-01-22 09:23:21.185793: step: 272/77, loss: 8.493542509313556e-07 2023-01-22 09:23:22.492270: step: 276/77, loss: 1.2965811038156971e-05 2023-01-22 09:23:23.804559: step: 280/77, loss: 0.004589627962559462 2023-01-22 09:23:25.147517: step: 284/77, loss: 0.050548847764730453 2023-01-22 09:23:26.463124: step: 288/77, loss: 0.0001131748576881364 2023-01-22 09:23:27.810008: step: 292/77, loss: 0.0035917344503104687 2023-01-22 09:23:29.110617: step: 296/77, loss: 0.0008585210307501256 2023-01-22 09:23:30.454391: step: 300/77, loss: 0.009906351566314697 2023-01-22 09:23:31.794749: step: 304/77, loss: 0.0002823400718625635 2023-01-22 09:23:33.101831: step: 308/77, loss: 0.01213001273572445 2023-01-22 09:23:34.382533: step: 312/77, loss: 5.42655334356823e-06 2023-01-22 09:23:35.677529: step: 316/77, loss: 0.006417948752641678 2023-01-22 09:23:36.959503: step: 320/77, loss: 0.0020375254098325968 2023-01-22 09:23:38.255226: step: 324/77, loss: 0.014030457474291325 2023-01-22 09:23:39.570879: step: 328/77, loss: 1.0271183782606386e-05 2023-01-22 09:23:40.933599: step: 332/77, loss: 6.595694139832631e-05 2023-01-22 09:23:42.295342: step: 336/77, loss: 1.1324874549245578e-07 2023-01-22 09:23:43.639195: step: 340/77, loss: 9.327890779786685e-07 2023-01-22 09:23:44.962771: step: 344/77, loss: 1.0981971172441263e-06 2023-01-22 09:23:46.270652: step: 348/77, loss: 0.0008091052295640111 2023-01-22 09:23:47.609830: step: 352/77, loss: 0.028471339493989944 2023-01-22 09:23:48.970374: step: 356/77, loss: 0.0009936308488249779 2023-01-22 09:23:50.304480: step: 360/77, loss: 0.010523582808673382 2023-01-22 09:23:51.618527: step: 364/77, loss: 1.226344465976581e-06 2023-01-22 09:23:52.913552: step: 368/77, loss: 2.78790389529604e-06 2023-01-22 09:23:54.222475: step: 372/77, loss: 5.760260137321893e-06 2023-01-22 09:23:55.546050: step: 376/77, loss: 2.6933328626910225e-05 2023-01-22 09:23:56.821124: step: 380/77, loss: 0.014928015880286694 2023-01-22 09:23:58.181817: step: 384/77, loss: 5.522427454707213e-05 2023-01-22 09:23:59.563077: step: 388/77, loss: 0.0033049150370061398 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 26} Test Chinese: {'template': {'p': 0.9411764705882353, 'r': 0.5, 'f1': 0.6530612244897959}, 'slot': {'p': 0.4473684210526316, 'r': 0.015567765567765568, 'f1': 0.030088495575221242}, 'combined': 0.01964962976340979, 'epoch': 26} Dev Korean: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 26} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.5, 'f1': 0.649746192893401}, 'slot': {'p': 0.4358974358974359, 'r': 0.015567765567765568, 'f1': 0.030061892130857647}, 'combined': 0.019532599963196846, 'epoch': 26} Dev Russian: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 26} Test Russian: {'template': {'p': 0.9411764705882353, 'r': 0.5, 'f1': 0.6530612244897959}, 'slot': {'p': 0.4473684210526316, 'r': 0.015567765567765568, 'f1': 0.030088495575221242}, 'combined': 0.01964962976340979, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 27 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:25:46.458596: step: 4/77, loss: 0.0020336946472525597 2023-01-22 09:25:47.725186: step: 8/77, loss: 0.018356280401349068 2023-01-22 09:25:49.059391: step: 12/77, loss: 0.06053902581334114 2023-01-22 09:25:50.395576: step: 16/77, loss: 6.077674242987996e-06 2023-01-22 09:25:51.733084: step: 20/77, loss: 0.031503964215517044 2023-01-22 09:25:53.077883: step: 24/77, loss: 0.0019097666954621673 2023-01-22 09:25:54.340022: step: 28/77, loss: 4.221747803967446e-05 2023-01-22 09:25:55.681890: step: 32/77, loss: 0.0009349957690574229 2023-01-22 09:25:56.992840: step: 36/77, loss: 7.898625881352928e-06 2023-01-22 09:25:58.233240: step: 40/77, loss: 7.117487257346511e-05 2023-01-22 09:25:59.523750: step: 44/77, loss: 7.294975512195379e-05 2023-01-22 09:26:00.863778: step: 48/77, loss: 0.04563151299953461 2023-01-22 09:26:02.191576: step: 52/77, loss: 2.12905270018382e-05 2023-01-22 09:26:03.477589: step: 56/77, loss: 1.4865798220853321e-05 2023-01-22 09:26:04.804696: step: 60/77, loss: 3.891951109835645e-06 2023-01-22 09:26:06.157202: step: 64/77, loss: 3.099417540397553e-07 2023-01-22 09:26:07.438026: step: 68/77, loss: 2.789420250337571e-06 2023-01-22 09:26:08.744854: step: 72/77, loss: 0.0001048161429935135 2023-01-22 09:26:10.083898: step: 76/77, loss: 3.1000083254184574e-05 2023-01-22 09:26:11.436051: step: 80/77, loss: 0.010710970498621464 2023-01-22 09:26:12.806208: step: 84/77, loss: 7.88254112649156e-07 2023-01-22 09:26:14.174657: step: 88/77, loss: 0.0006356869707815349 2023-01-22 09:26:15.542071: step: 92/77, loss: 0.0023964433930814266 2023-01-22 09:26:16.773700: step: 96/77, loss: 0.018310893326997757 2023-01-22 09:26:18.104314: step: 100/77, loss: 6.962125917198136e-05 2023-01-22 09:26:19.417201: step: 104/77, loss: 0.0013482404174283147 2023-01-22 09:26:20.731003: step: 108/77, loss: 1.6118830899358727e-05 2023-01-22 09:26:22.046704: step: 112/77, loss: 0.004443638492375612 2023-01-22 09:26:23.349675: step: 116/77, loss: 6.946529902052134e-05 2023-01-22 09:26:24.641910: step: 120/77, loss: 5.4241332691162825e-05 2023-01-22 09:26:25.978248: step: 124/77, loss: 0.00019961423822678626 2023-01-22 09:26:27.263128: step: 128/77, loss: 5.965904620097717e-06 2023-01-22 09:26:28.543643: step: 132/77, loss: 7.897602216644373e-08 2023-01-22 09:26:29.864446: step: 136/77, loss: 1.139921550930012e-06 2023-01-22 09:26:31.173945: step: 140/77, loss: 0.0010861429618671536 2023-01-22 09:26:32.483491: step: 144/77, loss: 5.543186034628889e-07 2023-01-22 09:26:33.841426: step: 148/77, loss: 0.012882563285529613 2023-01-22 09:26:35.149713: step: 152/77, loss: 1.2950848031323403e-05 2023-01-22 09:26:36.480542: step: 156/77, loss: 1.0281780049581357e-07 2023-01-22 09:26:37.761276: step: 160/77, loss: 0.006291708908975124 2023-01-22 09:26:39.085373: step: 164/77, loss: 4.768329517901293e-07 2023-01-22 09:26:40.399276: step: 168/77, loss: 5.200457735554664e-07 2023-01-22 09:26:41.730426: step: 172/77, loss: 3.1454194413527148e-06 2023-01-22 09:26:43.093542: step: 176/77, loss: 0.08373827487230301 2023-01-22 09:26:44.400231: step: 180/77, loss: 2.7380117899156176e-05 2023-01-22 09:26:45.689058: step: 184/77, loss: 0.00020311328989919275 2023-01-22 09:26:46.976617: step: 188/77, loss: 8.150796020345297e-07 2023-01-22 09:26:48.331459: step: 192/77, loss: 1.0640987966326065e-05 2023-01-22 09:26:49.655266: step: 196/77, loss: 4.972180704498896e-06 2023-01-22 09:26:51.029205: step: 200/77, loss: 7.004695362411439e-06 2023-01-22 09:26:52.378140: step: 204/77, loss: 4.842814860239741e-07 2023-01-22 09:26:53.702356: step: 208/77, loss: 6.608444527955726e-05 2023-01-22 09:26:54.980088: step: 212/77, loss: 0.0008418294601142406 2023-01-22 09:26:56.350602: step: 216/77, loss: 0.00019111075380351394 2023-01-22 09:26:57.668115: step: 220/77, loss: 0.0033331357408314943 2023-01-22 09:26:58.966952: step: 224/77, loss: 5.2410439820960164e-05 2023-01-22 09:27:00.291176: step: 228/77, loss: 4.4703465817974575e-08 2023-01-22 09:27:01.626599: step: 232/77, loss: 2.5035567887243815e-05 2023-01-22 09:27:02.951633: step: 236/77, loss: 0.0004050828283652663 2023-01-22 09:27:04.288813: step: 240/77, loss: 5.2385298658919055e-06 2023-01-22 09:27:05.622457: step: 244/77, loss: 3.755078807898826e-07 2023-01-22 09:27:06.898417: step: 248/77, loss: 0.002330003073439002 2023-01-22 09:27:08.204798: step: 252/77, loss: 1.5153957519942196e-06 2023-01-22 09:27:09.583664: step: 256/77, loss: 0.0001904086529975757 2023-01-22 09:27:10.938594: step: 260/77, loss: 0.00027098722057417035 2023-01-22 09:27:12.298045: step: 264/77, loss: 0.00016721387510187924 2023-01-22 09:27:13.628605: step: 268/77, loss: 9.611950372345746e-05 2023-01-22 09:27:14.967002: step: 272/77, loss: 4.1280334698967636e-05 2023-01-22 09:27:16.315929: step: 276/77, loss: 0.060717128217220306 2023-01-22 09:27:17.658032: step: 280/77, loss: 3.148373934891424e-06 2023-01-22 09:27:19.013201: step: 284/77, loss: 0.02487768419086933 2023-01-22 09:27:20.357796: step: 288/77, loss: 0.0035835355520248413 2023-01-22 09:27:21.670470: step: 292/77, loss: 6.977240991545841e-05 2023-01-22 09:27:22.949472: step: 296/77, loss: 0.01346883550286293 2023-01-22 09:27:24.292076: step: 300/77, loss: 5.304777914716396e-07 2023-01-22 09:27:25.619458: step: 304/77, loss: 0.0012750386958941817 2023-01-22 09:27:26.924425: step: 308/77, loss: 8.595256076660007e-05 2023-01-22 09:27:28.278289: step: 312/77, loss: 9.856934775598347e-05 2023-01-22 09:27:29.563992: step: 316/77, loss: 5.306802449922543e-06 2023-01-22 09:27:30.907937: step: 320/77, loss: 1.4739594007551204e-05 2023-01-22 09:27:32.180877: step: 324/77, loss: 1.0907513114943868e-06 2023-01-22 09:27:33.520786: step: 328/77, loss: 0.008079759776592255 2023-01-22 09:27:34.801207: step: 332/77, loss: 0.0067827519960701466 2023-01-22 09:27:36.129655: step: 336/77, loss: 6.75462870276533e-05 2023-01-22 09:27:37.454824: step: 340/77, loss: 4.3611848923319485e-06 2023-01-22 09:27:38.770560: step: 344/77, loss: 1.0728822985583975e-07 2023-01-22 09:27:40.090586: step: 348/77, loss: 1.3977514754515141e-05 2023-01-22 09:27:41.451145: step: 352/77, loss: 1.1816214282589499e-06 2023-01-22 09:27:42.770363: step: 356/77, loss: 0.00027072866214439273 2023-01-22 09:27:44.115256: step: 360/77, loss: 2.339343836865737e-06 2023-01-22 09:27:45.446775: step: 364/77, loss: 5.420063644123729e-06 2023-01-22 09:27:46.766822: step: 368/77, loss: 1.1458506605777075e-06 2023-01-22 09:27:48.044179: step: 372/77, loss: 0.0017846859991550446 2023-01-22 09:27:49.325554: step: 376/77, loss: 7.981000635481905e-06 2023-01-22 09:27:50.643592: step: 380/77, loss: 6.450262389989803e-06 2023-01-22 09:27:51.936725: step: 384/77, loss: 5.114632585900836e-06 2023-01-22 09:27:53.236543: step: 388/77, loss: 0.011223318055272102 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Chinese: {'template': {'p': 0.9324324324324325, 'r': 0.5390625, 'f1': 0.6831683168316832}, 'slot': {'p': 0.5625, 'r': 0.016483516483516484, 'f1': 0.032028469750889674}, 'combined': 0.02188083577040978, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Korean: {'template': {'p': 0.92, 'r': 0.5390625, 'f1': 0.6798029556650247}, 'slot': {'p': 0.5142857142857142, 'r': 0.016483516483516484, 'f1': 0.03194321206743567}, 'combined': 0.02171508997687745, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Russian: {'template': {'p': 0.9324324324324325, 'r': 0.5390625, 'f1': 0.6831683168316832}, 'slot': {'p': 0.5625, 'r': 0.016483516483516484, 'f1': 0.032028469750889674}, 'combined': 0.02188083577040978, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 28 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:29:40.621566: step: 4/77, loss: 1.6569199488003505e-06 2023-01-22 09:29:41.955960: step: 8/77, loss: 0.00440253596752882 2023-01-22 09:29:43.254459: step: 12/77, loss: 0.030564934015274048 2023-01-22 09:29:44.535719: step: 16/77, loss: 0.04476356878876686 2023-01-22 09:29:45.847346: step: 20/77, loss: 1.5125128811632749e-05 2023-01-22 09:29:47.156571: step: 24/77, loss: 0.0059463209472596645 2023-01-22 09:29:48.446374: step: 28/77, loss: 0.0001350680395262316 2023-01-22 09:29:49.751082: step: 32/77, loss: 5.26008307133452e-07 2023-01-22 09:29:51.156597: step: 36/77, loss: 1.583964490237122e-06 2023-01-22 09:29:52.435915: step: 40/77, loss: 0.001235371339134872 2023-01-22 09:29:53.735878: step: 44/77, loss: 4.629687828128226e-05 2023-01-22 09:29:55.052796: step: 48/77, loss: 9.21465743886074e-06 2023-01-22 09:29:56.337666: step: 52/77, loss: 6.062446118448861e-06 2023-01-22 09:29:57.681783: step: 56/77, loss: 0.004732063040137291 2023-01-22 09:29:59.038368: step: 60/77, loss: 1.5944220876917825e-07 2023-01-22 09:30:00.352616: step: 64/77, loss: 0.0011835889890789986 2023-01-22 09:30:01.696298: step: 68/77, loss: 0.002815812826156616 2023-01-22 09:30:03.024970: step: 72/77, loss: 9.246059198630974e-05 2023-01-22 09:30:04.286693: step: 76/77, loss: 0.006792837753891945 2023-01-22 09:30:05.615838: step: 80/77, loss: 4.330431329435669e-05 2023-01-22 09:30:06.963953: step: 84/77, loss: 0.17696473002433777 2023-01-22 09:30:08.293217: step: 88/77, loss: 0.0020602114964276552 2023-01-22 09:30:09.611433: step: 92/77, loss: 8.787129445408937e-06 2023-01-22 09:30:10.912103: step: 96/77, loss: 0.13107673823833466 2023-01-22 09:30:12.217469: step: 100/77, loss: 0.0006606405950151384 2023-01-22 09:30:13.531319: step: 104/77, loss: 0.0017154912929981947 2023-01-22 09:30:14.887506: step: 108/77, loss: 1.216352939081844e-05 2023-01-22 09:30:16.260611: step: 112/77, loss: 0.0022697513923048973 2023-01-22 09:30:17.579701: step: 116/77, loss: 3.87430070247774e-08 2023-01-22 09:30:18.879166: step: 120/77, loss: 0.002314480487257242 2023-01-22 09:30:20.198644: step: 124/77, loss: 9.292403774452396e-06 2023-01-22 09:30:21.463029: step: 128/77, loss: 5.170510848984122e-05 2023-01-22 09:30:22.759412: step: 132/77, loss: 7.686324534006417e-05 2023-01-22 09:30:24.045231: step: 136/77, loss: 7.330068456212757e-06 2023-01-22 09:30:25.373991: step: 140/77, loss: 3.695250143209705e-06 2023-01-22 09:30:26.648817: step: 144/77, loss: 0.0001743622706271708 2023-01-22 09:30:28.011517: step: 148/77, loss: 1.0203694728261326e-05 2023-01-22 09:30:29.357909: step: 152/77, loss: 0.009616820141673088 2023-01-22 09:30:30.676401: step: 156/77, loss: 0.0005464103305712342 2023-01-22 09:30:31.995819: step: 160/77, loss: 1.5138200978981331e-05 2023-01-22 09:30:33.284807: step: 164/77, loss: 1.4187040505930781e-05 2023-01-22 09:30:34.607608: step: 168/77, loss: 0.0017845199909061193 2023-01-22 09:30:35.901878: step: 172/77, loss: 0.0008456232608295977 2023-01-22 09:30:37.185110: step: 176/77, loss: 4.213723514112644e-06 2023-01-22 09:30:38.516236: step: 180/77, loss: 2.3990813247110054e-07 2023-01-22 09:30:39.869263: step: 184/77, loss: 0.009962303563952446 2023-01-22 09:30:41.245856: step: 188/77, loss: 0.001620661118067801 2023-01-22 09:30:42.577710: step: 192/77, loss: 0.0022963618393987417 2023-01-22 09:30:43.878655: step: 196/77, loss: 0.008852764964103699 2023-01-22 09:30:45.185692: step: 200/77, loss: 1.7881298219890596e-07 2023-01-22 09:30:46.476102: step: 204/77, loss: 0.0029082591645419598 2023-01-22 09:30:47.806661: step: 208/77, loss: 0.0013253169599920511 2023-01-22 09:30:49.085503: step: 212/77, loss: 1.2865475582657382e-05 2023-01-22 09:30:50.412603: step: 216/77, loss: 7.049329269648297e-06 2023-01-22 09:30:51.724674: step: 220/77, loss: 0.004812009632587433 2023-01-22 09:30:53.038404: step: 224/77, loss: 1.2530237654573284e-05 2023-01-22 09:30:54.286720: step: 228/77, loss: 1.6643899698465248e-06 2023-01-22 09:30:55.566799: step: 232/77, loss: 0.06316334009170532 2023-01-22 09:30:56.862906: step: 236/77, loss: 0.0038331036921590567 2023-01-22 09:30:58.182660: step: 240/77, loss: 0.0005883581470698118 2023-01-22 09:30:59.551906: step: 244/77, loss: 1.542809695820324e-05 2023-01-22 09:31:00.868705: step: 248/77, loss: 1.5246181646944024e-05 2023-01-22 09:31:02.130590: step: 252/77, loss: 0.0017049933085218072 2023-01-22 09:31:03.430100: step: 256/77, loss: 0.033161669969558716 2023-01-22 09:31:04.786764: step: 260/77, loss: 0.00030278004123829305 2023-01-22 09:31:06.102102: step: 264/77, loss: 0.004430832806974649 2023-01-22 09:31:07.401816: step: 268/77, loss: 6.538610705320025e-06 2023-01-22 09:31:08.708401: step: 272/77, loss: 2.954918636532966e-05 2023-01-22 09:31:10.037938: step: 276/77, loss: 0.020510945469141006 2023-01-22 09:31:11.391419: step: 280/77, loss: 0.00015496351988986135 2023-01-22 09:31:12.685983: step: 284/77, loss: 0.00019689204054884613 2023-01-22 09:31:13.993622: step: 288/77, loss: 0.02547045610845089 2023-01-22 09:31:15.305243: step: 292/77, loss: 1.9126133338431828e-05 2023-01-22 09:31:16.638364: step: 296/77, loss: 3.993159225501586e-06 2023-01-22 09:31:17.928313: step: 300/77, loss: 4.5150034111429704e-07 2023-01-22 09:31:19.204688: step: 304/77, loss: 0.09149489551782608 2023-01-22 09:31:20.528873: step: 308/77, loss: 7.498901140934322e-06 2023-01-22 09:31:21.876550: step: 312/77, loss: 0.05403384566307068 2023-01-22 09:31:23.175882: step: 316/77, loss: 1.805801184673328e-05 2023-01-22 09:31:24.474824: step: 320/77, loss: 0.04299530014395714 2023-01-22 09:31:25.764946: step: 324/77, loss: 1.7881388814089405e-08 2023-01-22 09:31:27.065868: step: 328/77, loss: 8.31848865345819e-06 2023-01-22 09:31:28.415916: step: 332/77, loss: 0.022570772096514702 2023-01-22 09:31:29.733265: step: 336/77, loss: 0.005088389851152897 2023-01-22 09:31:31.035392: step: 340/77, loss: 0.0036787528079003096 2023-01-22 09:31:32.372167: step: 344/77, loss: 3.063491931243334e-06 2023-01-22 09:31:33.663555: step: 348/77, loss: 0.0003421948349568993 2023-01-22 09:31:35.010814: step: 352/77, loss: 0.0001434506702935323 2023-01-22 09:31:36.365792: step: 356/77, loss: 7.787423965055496e-05 2023-01-22 09:31:37.635505: step: 360/77, loss: 2.6348821847932413e-05 2023-01-22 09:31:38.950949: step: 364/77, loss: 1.9668830191221787e-06 2023-01-22 09:31:40.242999: step: 368/77, loss: 2.5388269932591356e-05 2023-01-22 09:31:41.565596: step: 372/77, loss: 4.4703469370688254e-08 2023-01-22 09:31:42.897010: step: 376/77, loss: 0.03512633219361305 2023-01-22 09:31:44.250078: step: 380/77, loss: 0.012198736891150475 2023-01-22 09:31:45.541383: step: 384/77, loss: 0.003197713755071163 2023-01-22 09:31:46.923434: step: 388/77, loss: 0.0030318221542984247 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Chinese: {'template': {'p': 0.9850746268656716, 'r': 0.515625, 'f1': 0.676923076923077}, 'slot': {'p': 0.5161290322580645, 'r': 0.014652014652014652, 'f1': 0.028495102404274268}, 'combined': 0.019288992396739507, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Korean: {'template': {'p': 0.9846153846153847, 'r': 0.5, 'f1': 0.6632124352331606}, 'slot': {'p': 0.5161290322580645, 'r': 0.014652014652014652, 'f1': 0.028495102404274268}, 'combined': 0.018898306257757028, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Russian: {'template': {'p': 0.9705882352941176, 'r': 0.515625, 'f1': 0.673469387755102}, 'slot': {'p': 0.5, 'r': 0.013736263736263736, 'f1': 0.026737967914438505}, 'combined': 0.018007202881152463, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 29 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:33:33.637140: step: 4/77, loss: 9.575464900990482e-06 2023-01-22 09:33:35.009784: step: 8/77, loss: 4.4703465817974575e-08 2023-01-22 09:33:36.375092: step: 12/77, loss: 2.4912869776017033e-06 2023-01-22 09:33:37.671006: step: 16/77, loss: 0.00020589017367456108 2023-01-22 09:33:39.015367: step: 20/77, loss: 0.010029882192611694 2023-01-22 09:33:40.305567: step: 24/77, loss: 0.004283885005861521 2023-01-22 09:33:41.565107: step: 28/77, loss: 2.518277426588611e-07 2023-01-22 09:33:42.918290: step: 32/77, loss: 1.4901119982368982e-07 2023-01-22 09:33:44.247831: step: 36/77, loss: 0.00012935479753650725 2023-01-22 09:33:45.558025: step: 40/77, loss: 4.192922915535746e-06 2023-01-22 09:33:46.915681: step: 44/77, loss: 0.008619138039648533 2023-01-22 09:33:48.251899: step: 48/77, loss: 9.372622571390821e-07 2023-01-22 09:33:49.575457: step: 52/77, loss: 2.7341654913470848e-06 2023-01-22 09:33:50.922387: step: 56/77, loss: 1.3634332844958408e-06 2023-01-22 09:33:52.249933: step: 60/77, loss: 1.0352901881560683e-05 2023-01-22 09:33:53.545183: step: 64/77, loss: 5.2205054089426994e-05 2023-01-22 09:33:54.841994: step: 68/77, loss: 7.682454452151433e-05 2023-01-22 09:33:56.179959: step: 72/77, loss: 0.0089035015553236 2023-01-22 09:33:57.460319: step: 76/77, loss: 4.730820364784449e-06 2023-01-22 09:33:58.749780: step: 80/77, loss: 0.00038829841651022434 2023-01-22 09:34:00.042122: step: 84/77, loss: 0.007953963242471218 2023-01-22 09:34:01.362023: step: 88/77, loss: 0.009263883344829082 2023-01-22 09:34:02.695263: step: 92/77, loss: 0.0070535242557525635 2023-01-22 09:34:04.021503: step: 96/77, loss: 4.276575782569125e-05 2023-01-22 09:34:05.353107: step: 100/77, loss: 9.437465632800013e-05 2023-01-22 09:34:06.667753: step: 104/77, loss: 1.0943134839180857e-05 2023-01-22 09:34:08.015214: step: 108/77, loss: 0.046666789799928665 2023-01-22 09:34:09.359804: step: 112/77, loss: 6.960002065170556e-05 2023-01-22 09:34:10.720101: step: 116/77, loss: 1.4876502973493189e-05 2023-01-22 09:34:12.042556: step: 120/77, loss: 2.0557276002364233e-05 2023-01-22 09:34:13.366526: step: 124/77, loss: 2.9650689157278975e-06 2023-01-22 09:34:14.741383: step: 128/77, loss: 0.01160411350429058 2023-01-22 09:34:16.072884: step: 132/77, loss: 1.8775409671434318e-07 2023-01-22 09:34:17.341066: step: 136/77, loss: 0.002117191907018423 2023-01-22 09:34:18.643479: step: 140/77, loss: 8.289856850751676e-06 2023-01-22 09:34:19.946185: step: 144/77, loss: 3.176602149324026e-06 2023-01-22 09:34:21.247220: step: 148/77, loss: 1.4546119928127155e-05 2023-01-22 09:34:22.574629: step: 152/77, loss: 6.943806170056632e-07 2023-01-22 09:34:23.886283: step: 156/77, loss: 1.3262007314551738e-07 2023-01-22 09:34:25.209882: step: 160/77, loss: 4.4194708607392386e-05 2023-01-22 09:34:26.519882: step: 164/77, loss: 0.0023931171745061874 2023-01-22 09:34:27.869717: step: 168/77, loss: 1.624219407858618e-07 2023-01-22 09:34:29.236126: step: 172/77, loss: 3.7634523323504254e-05 2023-01-22 09:34:30.560676: step: 176/77, loss: 6.586186600543442e-07 2023-01-22 09:34:31.901862: step: 180/77, loss: 2.575074540800415e-05 2023-01-22 09:34:33.184118: step: 184/77, loss: 2.8311990263318876e-07 2023-01-22 09:34:34.460282: step: 188/77, loss: 8.425443957094103e-05 2023-01-22 09:34:35.778447: step: 192/77, loss: 6.786584708606824e-05 2023-01-22 09:34:37.095585: step: 196/77, loss: 0.0005444654962047935 2023-01-22 09:34:38.423028: step: 200/77, loss: 1.1212527169845998e-05 2023-01-22 09:34:39.737885: step: 204/77, loss: 0.06021525710821152 2023-01-22 09:34:41.051868: step: 208/77, loss: 0.0005923082935623825 2023-01-22 09:34:42.366228: step: 212/77, loss: 3.2341471523977816e-05 2023-01-22 09:34:43.681105: step: 216/77, loss: 0.023997997865080833 2023-01-22 09:34:45.011787: step: 220/77, loss: 2.1134215785423294e-05 2023-01-22 09:34:46.318512: step: 224/77, loss: 1.3411697182164062e-05 2023-01-22 09:34:47.635575: step: 228/77, loss: 0.003213142976164818 2023-01-22 09:34:48.966030: step: 232/77, loss: 8.688562957104295e-05 2023-01-22 09:34:50.297484: step: 236/77, loss: 0.038733284920454025 2023-01-22 09:34:51.635789: step: 240/77, loss: 0.0021264494862407446 2023-01-22 09:34:53.001782: step: 244/77, loss: 0.0007159699453040957 2023-01-22 09:34:54.265723: step: 248/77, loss: 0.02838277630507946 2023-01-22 09:34:55.628655: step: 252/77, loss: 4.948479909216985e-05 2023-01-22 09:34:56.942896: step: 256/77, loss: 0.004647047724574804 2023-01-22 09:34:58.312608: step: 260/77, loss: 0.0006775567308068275 2023-01-22 09:34:59.644466: step: 264/77, loss: 5.960457727383073e-08 2023-01-22 09:35:00.960011: step: 268/77, loss: 0.0006495536654256284 2023-01-22 09:35:02.271563: step: 272/77, loss: 0.00016570983279962093 2023-01-22 09:35:03.595083: step: 276/77, loss: 0.00035573996137827635 2023-01-22 09:35:04.868155: step: 280/77, loss: 3.2136176741914824e-05 2023-01-22 09:35:06.172620: step: 284/77, loss: 3.5567613849707413e-06 2023-01-22 09:35:07.477719: step: 288/77, loss: 3.010017337601312e-07 2023-01-22 09:35:08.801053: step: 292/77, loss: 0.008235101588070393 2023-01-22 09:35:10.120113: step: 296/77, loss: 0.0002233803243143484 2023-01-22 09:35:11.427227: step: 300/77, loss: 0.08005578815937042 2023-01-22 09:35:12.772441: step: 304/77, loss: 0.004035983234643936 2023-01-22 09:35:14.056565: step: 308/77, loss: 9.74525846686447e-07 2023-01-22 09:35:15.398185: step: 312/77, loss: 3.2695716072339565e-05 2023-01-22 09:35:16.729587: step: 316/77, loss: 7.405756718981138e-07 2023-01-22 09:35:18.030175: step: 320/77, loss: 3.3630524285399588e-06 2023-01-22 09:35:19.336794: step: 324/77, loss: 3.0411854368139757e-06 2023-01-22 09:35:20.729165: step: 328/77, loss: 2.631331653901725e-06 2023-01-22 09:35:22.052804: step: 332/77, loss: 0.0001946384581970051 2023-01-22 09:35:23.374102: step: 336/77, loss: 0.005614703521132469 2023-01-22 09:35:24.682571: step: 340/77, loss: 2.889034249164979e-06 2023-01-22 09:35:26.025755: step: 344/77, loss: 0.00015798686945345253 2023-01-22 09:35:27.315239: step: 348/77, loss: 0.004673933610320091 2023-01-22 09:35:28.653673: step: 352/77, loss: 8.429842637269758e-06 2023-01-22 09:35:29.955187: step: 356/77, loss: 0.00011223576439078897 2023-01-22 09:35:31.237736: step: 360/77, loss: 5.240143582341261e-05 2023-01-22 09:35:32.590188: step: 364/77, loss: 2.807161035889294e-05 2023-01-22 09:35:33.929353: step: 368/77, loss: 0.003233521245419979 2023-01-22 09:35:35.269864: step: 372/77, loss: 0.08775091171264648 2023-01-22 09:35:36.586983: step: 376/77, loss: 0.0008770658751018345 2023-01-22 09:35:37.872850: step: 380/77, loss: 1.5991987311281264e-05 2023-01-22 09:35:39.161345: step: 384/77, loss: 0.013519562780857086 2023-01-22 09:35:40.476081: step: 388/77, loss: 5.1822891691699624e-05 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 29} Test Chinese: {'template': {'p': 0.9565217391304348, 'r': 0.515625, 'f1': 0.6700507614213198}, 'slot': {'p': 0.6206896551724138, 'r': 0.016483516483516484, 'f1': 0.032114183764495985}, 'combined': 0.021518133283824722, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 29} Test Korean: {'template': {'p': 0.9428571428571428, 'r': 0.515625, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5625, 'r': 0.016483516483516484, 'f1': 0.032028469750889674}, 'combined': 0.02135231316725978, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 29} Test Russian: {'template': {'p': 0.9565217391304348, 'r': 0.515625, 'f1': 0.6700507614213198}, 'slot': {'p': 0.6, 'r': 0.016483516483516484, 'f1': 0.0320855614973262}, 'combined': 0.021498954911914003, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Chinese: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Korean: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9692307692307692, 'r': 0.4921875, 'f1': 0.6528497409326425}, 'slot': {'p': 0.55, 'r': 0.010073260073260074, 'f1': 0.01978417266187051}, 'combined': 0.01291609199686883, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4}