Command that produces this log: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> trans_rep.weight: torch.Size([1024, 2048]) >>> trans_rep.bias: torch.Size([1024]) >>> hidden_ffns.Corruplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Corruplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Cybercrimeplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Cybercrimeplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Disasterplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Disasterplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Displacementplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Displacementplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Epidemiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Epidemiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Etiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Etiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Protestplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Protestplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Terrorplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Terrorplate.layers.0.bias: torch.Size([768]) >>> template_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> type_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Corruplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Corruplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Disasterplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Disasterplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Displacementplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Displacementplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Epidemiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Epidemiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Etiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Etiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Protestplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Protestplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Terrorplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Terrorplate.layers.1.bias: torch.Size([6]) >>> completion_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Corruplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Corruplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Disasterplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Disasterplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Displacementplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Displacementplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Epidemiplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Epidemiplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Etiplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Etiplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Protestplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Protestplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Terrorplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Terrorplate.layers.1.bias: torch.Size([4]) >>> overtime_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) n_trainable_params: 582185936, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:08:52.893130: step: 4/77, loss: 1.0617469549179077 2023-01-22 07:08:54.162231: step: 8/77, loss: 1.071832537651062 2023-01-22 07:08:55.404930: step: 12/77, loss: 1.0500397682189941 2023-01-22 07:08:56.714220: step: 16/77, loss: 1.0569696426391602 2023-01-22 07:08:58.043500: step: 20/77, loss: 1.036795973777771 2023-01-22 07:08:59.403772: step: 24/77, loss: 1.053377628326416 2023-01-22 07:09:00.722331: step: 28/77, loss: 1.0394463539123535 2023-01-22 07:09:02.040397: step: 32/77, loss: 1.0370593070983887 2023-01-22 07:09:03.341771: step: 36/77, loss: 1.0463190078735352 2023-01-22 07:09:04.643713: step: 40/77, loss: 1.0341830253601074 2023-01-22 07:09:05.909549: step: 44/77, loss: 1.01943039894104 2023-01-22 07:09:07.147464: step: 48/77, loss: 1.0161728858947754 2023-01-22 07:09:08.397231: step: 52/77, loss: 0.9935738444328308 2023-01-22 07:09:09.677950: step: 56/77, loss: 0.9947496652603149 2023-01-22 07:09:10.958494: step: 60/77, loss: 0.9947984218597412 2023-01-22 07:09:12.306315: step: 64/77, loss: 0.9740759134292603 2023-01-22 07:09:13.578317: step: 68/77, loss: 0.9578840732574463 2023-01-22 07:09:14.885296: step: 72/77, loss: 0.958991527557373 2023-01-22 07:09:16.190767: step: 76/77, loss: 0.9292556047439575 2023-01-22 07:09:17.533417: step: 80/77, loss: 0.9261636734008789 2023-01-22 07:09:18.815347: step: 84/77, loss: 0.9224743247032166 2023-01-22 07:09:20.142379: step: 88/77, loss: 0.9078954458236694 2023-01-22 07:09:21.493469: step: 92/77, loss: 0.8906626105308533 2023-01-22 07:09:22.779459: step: 96/77, loss: 0.8689224720001221 2023-01-22 07:09:24.090443: step: 100/77, loss: 0.8581384420394897 2023-01-22 07:09:25.414162: step: 104/77, loss: 0.8440836668014526 2023-01-22 07:09:26.738558: step: 108/77, loss: 0.8239055871963501 2023-01-22 07:09:28.028607: step: 112/77, loss: 0.7871434688568115 2023-01-22 07:09:29.310515: step: 116/77, loss: 0.7866687178611755 2023-01-22 07:09:30.616077: step: 120/77, loss: 0.8152998685836792 2023-01-22 07:09:31.920679: step: 124/77, loss: 0.7473165988922119 2023-01-22 07:09:33.274874: step: 128/77, loss: 0.7465535402297974 2023-01-22 07:09:34.553341: step: 132/77, loss: 0.7344512939453125 2023-01-22 07:09:35.866585: step: 136/77, loss: 0.6562771797180176 2023-01-22 07:09:37.188654: step: 140/77, loss: 0.7067196369171143 2023-01-22 07:09:38.532789: step: 144/77, loss: 0.6306025981903076 2023-01-22 07:09:39.835179: step: 148/77, loss: 0.6918684244155884 2023-01-22 07:09:41.215879: step: 152/77, loss: 0.5625179409980774 2023-01-22 07:09:42.562812: step: 156/77, loss: 0.5566755533218384 2023-01-22 07:09:43.880650: step: 160/77, loss: 0.5231382846832275 2023-01-22 07:09:45.237616: step: 164/77, loss: 0.5328485369682312 2023-01-22 07:09:46.497796: step: 168/77, loss: 0.5300477147102356 2023-01-22 07:09:47.794077: step: 172/77, loss: 0.4442155957221985 2023-01-22 07:09:49.082777: step: 176/77, loss: 0.4701414108276367 2023-01-22 07:09:50.368120: step: 180/77, loss: 0.45259642601013184 2023-01-22 07:09:51.639238: step: 184/77, loss: 0.3861841559410095 2023-01-22 07:09:52.917229: step: 188/77, loss: 0.4100555181503296 2023-01-22 07:09:54.220589: step: 192/77, loss: 0.3761431574821472 2023-01-22 07:09:55.548069: step: 196/77, loss: 0.48926883935928345 2023-01-22 07:09:56.879503: step: 200/77, loss: 0.3657160997390747 2023-01-22 07:09:58.214044: step: 204/77, loss: 0.314441055059433 2023-01-22 07:09:59.524866: step: 208/77, loss: 0.2680244743824005 2023-01-22 07:10:00.831989: step: 212/77, loss: 0.2731962502002716 2023-01-22 07:10:02.128947: step: 216/77, loss: 0.22471654415130615 2023-01-22 07:10:03.454176: step: 220/77, loss: 0.26468920707702637 2023-01-22 07:10:04.781479: step: 224/77, loss: 0.20208311080932617 2023-01-22 07:10:06.087821: step: 228/77, loss: 0.24795040488243103 2023-01-22 07:10:07.402426: step: 232/77, loss: 0.23075196146965027 2023-01-22 07:10:08.673249: step: 236/77, loss: 0.20327256619930267 2023-01-22 07:10:09.995248: step: 240/77, loss: 0.17273639142513275 2023-01-22 07:10:11.323577: step: 244/77, loss: 0.1513095498085022 2023-01-22 07:10:12.633779: step: 248/77, loss: 0.17677460610866547 2023-01-22 07:10:13.937355: step: 252/77, loss: 0.10056942701339722 2023-01-22 07:10:15.211168: step: 256/77, loss: 0.09389781951904297 2023-01-22 07:10:16.520970: step: 260/77, loss: 0.11536524444818497 2023-01-22 07:10:17.812066: step: 264/77, loss: 0.17818987369537354 2023-01-22 07:10:19.091578: step: 268/77, loss: 0.23469536006450653 2023-01-22 07:10:20.403383: step: 272/77, loss: 0.16398277878761292 2023-01-22 07:10:21.696174: step: 276/77, loss: 0.27092573046684265 2023-01-22 07:10:22.956012: step: 280/77, loss: 0.15249253809452057 2023-01-22 07:10:24.207220: step: 284/77, loss: 0.14844149351119995 2023-01-22 07:10:25.507519: step: 288/77, loss: 0.034965530037879944 2023-01-22 07:10:26.861519: step: 292/77, loss: 0.19286976754665375 2023-01-22 07:10:28.148278: step: 296/77, loss: 0.13884218037128448 2023-01-22 07:10:29.453245: step: 300/77, loss: 0.05468413233757019 2023-01-22 07:10:30.749059: step: 304/77, loss: 0.2615947425365448 2023-01-22 07:10:32.047828: step: 308/77, loss: 0.06353268027305603 2023-01-22 07:10:33.337525: step: 312/77, loss: 0.3093520402908325 2023-01-22 07:10:34.663847: step: 316/77, loss: 0.13308614492416382 2023-01-22 07:10:35.930327: step: 320/77, loss: 0.0702965259552002 2023-01-22 07:10:37.252929: step: 324/77, loss: 0.09301605820655823 2023-01-22 07:10:38.543066: step: 328/77, loss: 0.0733005702495575 2023-01-22 07:10:39.807030: step: 332/77, loss: 0.10775697976350784 2023-01-22 07:10:41.096357: step: 336/77, loss: 0.08684641122817993 2023-01-22 07:10:42.416227: step: 340/77, loss: 0.07254372537136078 2023-01-22 07:10:43.725912: step: 344/77, loss: 0.11763302981853485 2023-01-22 07:10:44.988089: step: 348/77, loss: 0.16487447917461395 2023-01-22 07:10:46.264963: step: 352/77, loss: 0.08935951441526413 2023-01-22 07:10:47.549792: step: 356/77, loss: 0.08099295198917389 2023-01-22 07:10:48.836034: step: 360/77, loss: 0.07289375364780426 2023-01-22 07:10:50.103969: step: 364/77, loss: 0.07927817851305008 2023-01-22 07:10:51.417887: step: 368/77, loss: 0.06166777387261391 2023-01-22 07:10:52.706395: step: 372/77, loss: 0.1259939819574356 2023-01-22 07:10:54.006446: step: 376/77, loss: 0.36279356479644775 2023-01-22 07:10:55.266712: step: 380/77, loss: 0.035687074065208435 2023-01-22 07:10:56.577458: step: 384/77, loss: 0.11559747159481049 2023-01-22 07:10:57.877237: step: 388/77, loss: 0.12898707389831543 ================================================== Loss: 0.485 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:12:51.304624: step: 4/77, loss: 0.11158759146928787 2023-01-22 07:12:52.572935: step: 8/77, loss: 0.19867026805877686 2023-01-22 07:12:53.851427: step: 12/77, loss: 0.058240726590156555 2023-01-22 07:12:55.164082: step: 16/77, loss: 0.20178799331188202 2023-01-22 07:12:56.463199: step: 20/77, loss: 0.02973691187798977 2023-01-22 07:12:57.759988: step: 24/77, loss: 0.0995328426361084 2023-01-22 07:12:59.069632: step: 28/77, loss: 0.1865939050912857 2023-01-22 07:13:00.378494: step: 32/77, loss: 0.1807020604610443 2023-01-22 07:13:01.666172: step: 36/77, loss: 0.0407174676656723 2023-01-22 07:13:02.963827: step: 40/77, loss: 0.10385138541460037 2023-01-22 07:13:04.232251: step: 44/77, loss: 0.08694268763065338 2023-01-22 07:13:05.510799: step: 48/77, loss: 0.09504902362823486 2023-01-22 07:13:06.789686: step: 52/77, loss: 0.10106496512889862 2023-01-22 07:13:08.068574: step: 56/77, loss: 0.05474689230322838 2023-01-22 07:13:09.344572: step: 60/77, loss: 0.1368316113948822 2023-01-22 07:13:10.639229: step: 64/77, loss: 0.13929077982902527 2023-01-22 07:13:11.965602: step: 68/77, loss: 0.2764717638492584 2023-01-22 07:13:13.234057: step: 72/77, loss: 0.08261242508888245 2023-01-22 07:13:14.478464: step: 76/77, loss: 0.1324632167816162 2023-01-22 07:13:15.783296: step: 80/77, loss: 0.15174496173858643 2023-01-22 07:13:17.098648: step: 84/77, loss: 0.11399096250534058 2023-01-22 07:13:18.347946: step: 88/77, loss: 0.07976777851581573 2023-01-22 07:13:19.649622: step: 92/77, loss: 0.04927085340023041 2023-01-22 07:13:20.911768: step: 96/77, loss: 0.1352429986000061 2023-01-22 07:13:22.187729: step: 100/77, loss: 0.07732612639665604 2023-01-22 07:13:23.557105: step: 104/77, loss: 0.09204569458961487 2023-01-22 07:13:24.845557: step: 108/77, loss: 0.14470499753952026 2023-01-22 07:13:26.134212: step: 112/77, loss: 0.08708120137453079 2023-01-22 07:13:27.438934: step: 116/77, loss: 0.21689417958259583 2023-01-22 07:13:28.747367: step: 120/77, loss: 0.18309366703033447 2023-01-22 07:13:30.040661: step: 124/77, loss: 0.10907326638698578 2023-01-22 07:13:31.321431: step: 128/77, loss: 0.06838659197092056 2023-01-22 07:13:32.581904: step: 132/77, loss: 0.09328626841306686 2023-01-22 07:13:33.849009: step: 136/77, loss: 0.06526198983192444 2023-01-22 07:13:35.128933: step: 140/77, loss: 0.10818645358085632 2023-01-22 07:13:36.412223: step: 144/77, loss: 0.1860373616218567 2023-01-22 07:13:37.673045: step: 148/77, loss: 0.07784507423639297 2023-01-22 07:13:38.964648: step: 152/77, loss: 0.08227992057800293 2023-01-22 07:13:40.224155: step: 156/77, loss: 0.08509774506092072 2023-01-22 07:13:41.495600: step: 160/77, loss: 0.04389530420303345 2023-01-22 07:13:42.779313: step: 164/77, loss: 0.08508370816707611 2023-01-22 07:13:44.096387: step: 168/77, loss: 0.10564348101615906 2023-01-22 07:13:45.403791: step: 172/77, loss: 0.20949102938175201 2023-01-22 07:13:46.701030: step: 176/77, loss: 0.054892972111701965 2023-01-22 07:13:48.024495: step: 180/77, loss: 0.2804702818393707 2023-01-22 07:13:49.236569: step: 184/77, loss: 0.029961321502923965 2023-01-22 07:13:50.576936: step: 188/77, loss: 0.03746054694056511 2023-01-22 07:13:51.869693: step: 192/77, loss: 0.10914406180381775 2023-01-22 07:13:53.177347: step: 196/77, loss: 0.04498276859521866 2023-01-22 07:13:54.437434: step: 200/77, loss: 0.08990252017974854 2023-01-22 07:13:55.700840: step: 204/77, loss: 0.09215635806322098 2023-01-22 07:13:56.988631: step: 208/77, loss: 0.2269093245267868 2023-01-22 07:13:58.287910: step: 212/77, loss: 0.25756344199180603 2023-01-22 07:13:59.591185: step: 216/77, loss: 0.1051335409283638 2023-01-22 07:14:00.910205: step: 220/77, loss: 0.07337190210819244 2023-01-22 07:14:02.150067: step: 224/77, loss: 0.10456050932407379 2023-01-22 07:14:03.460903: step: 228/77, loss: 0.07801325619220734 2023-01-22 07:14:04.784543: step: 232/77, loss: 0.13213995099067688 2023-01-22 07:14:06.072220: step: 236/77, loss: 0.10130394995212555 2023-01-22 07:14:07.387746: step: 240/77, loss: 0.16945858299732208 2023-01-22 07:14:08.656009: step: 244/77, loss: 0.03165304660797119 2023-01-22 07:14:09.889528: step: 248/77, loss: 0.10367722809314728 2023-01-22 07:14:11.121425: step: 252/77, loss: 0.12703952193260193 2023-01-22 07:14:12.426518: step: 256/77, loss: 0.0646631270647049 2023-01-22 07:14:13.731322: step: 260/77, loss: 0.09364254027605057 2023-01-22 07:14:15.007351: step: 264/77, loss: 0.09511947631835938 2023-01-22 07:14:16.287897: step: 268/77, loss: 0.08383557200431824 2023-01-22 07:14:17.585162: step: 272/77, loss: 0.07730644941329956 2023-01-22 07:14:18.886866: step: 276/77, loss: 0.1986403912305832 2023-01-22 07:14:20.216773: step: 280/77, loss: 0.056749798357486725 2023-01-22 07:14:21.538121: step: 284/77, loss: 0.07241851091384888 2023-01-22 07:14:22.853277: step: 288/77, loss: 0.048592459410429 2023-01-22 07:14:24.177886: step: 292/77, loss: 0.1085418313741684 2023-01-22 07:14:25.505806: step: 296/77, loss: 0.06122410297393799 2023-01-22 07:14:26.827861: step: 300/77, loss: 0.07612358033657074 2023-01-22 07:14:28.132983: step: 304/77, loss: 0.047660645097494125 2023-01-22 07:14:29.396388: step: 308/77, loss: 0.08316922932863235 2023-01-22 07:14:30.667191: step: 312/77, loss: 0.11238834261894226 2023-01-22 07:14:31.971750: step: 316/77, loss: 0.1417194902896881 2023-01-22 07:14:33.224471: step: 320/77, loss: 0.12480755150318146 2023-01-22 07:14:34.500577: step: 324/77, loss: 0.05285458266735077 2023-01-22 07:14:35.808600: step: 328/77, loss: 0.16706568002700806 2023-01-22 07:14:37.084689: step: 332/77, loss: 0.13593162596225739 2023-01-22 07:14:38.425276: step: 336/77, loss: 0.07430271804332733 2023-01-22 07:14:39.728013: step: 340/77, loss: 0.05810857564210892 2023-01-22 07:14:41.027047: step: 344/77, loss: 0.1264181137084961 2023-01-22 07:14:42.319525: step: 348/77, loss: 0.05887303501367569 2023-01-22 07:14:43.604132: step: 352/77, loss: 0.1920332908630371 2023-01-22 07:14:44.904111: step: 356/77, loss: 0.03397180885076523 2023-01-22 07:14:46.205197: step: 360/77, loss: 0.061011020094156265 2023-01-22 07:14:47.489247: step: 364/77, loss: 0.06792081892490387 2023-01-22 07:14:48.776268: step: 368/77, loss: 0.10783128440380096 2023-01-22 07:14:50.113828: step: 372/77, loss: 0.05417155474424362 2023-01-22 07:14:51.433025: step: 376/77, loss: 0.0812927782535553 2023-01-22 07:14:52.746640: step: 380/77, loss: 0.06356360018253326 2023-01-22 07:14:54.040895: step: 384/77, loss: 0.07857591658830643 2023-01-22 07:14:55.346960: step: 388/77, loss: 0.05766603723168373 ================================================== Loss: 0.106 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 2 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:16:31.824762: step: 4/77, loss: 0.11322622001171112 2023-01-22 07:16:33.138290: step: 8/77, loss: 0.10179299116134644 2023-01-22 07:16:34.426109: step: 12/77, loss: 0.06799320876598358 2023-01-22 07:16:35.689018: step: 16/77, loss: 0.15302878618240356 2023-01-22 07:16:36.951790: step: 20/77, loss: 0.11456996202468872 2023-01-22 07:16:38.275036: step: 24/77, loss: 0.06971030682325363 2023-01-22 07:16:39.572398: step: 28/77, loss: 0.09485167264938354 2023-01-22 07:16:40.872259: step: 32/77, loss: 0.09777399897575378 2023-01-22 07:16:42.170731: step: 36/77, loss: 0.04542500525712967 2023-01-22 07:16:43.470850: step: 40/77, loss: 0.10130670666694641 2023-01-22 07:16:44.764773: step: 44/77, loss: 0.04343476518988609 2023-01-22 07:16:46.059922: step: 48/77, loss: 0.2175489366054535 2023-01-22 07:16:47.341241: step: 52/77, loss: 0.04697784036397934 2023-01-22 07:16:48.623007: step: 56/77, loss: 0.11778730154037476 2023-01-22 07:16:49.885134: step: 60/77, loss: 0.056548308581113815 2023-01-22 07:16:51.168955: step: 64/77, loss: 0.01581401564180851 2023-01-22 07:16:52.422405: step: 68/77, loss: 0.05202740058302879 2023-01-22 07:16:53.656251: step: 72/77, loss: 0.06093277037143707 2023-01-22 07:16:54.927276: step: 76/77, loss: 0.02446526102721691 2023-01-22 07:16:56.218287: step: 80/77, loss: 0.06041073054075241 2023-01-22 07:16:57.464448: step: 84/77, loss: 0.04046632722020149 2023-01-22 07:16:58.759467: step: 88/77, loss: 0.07183364033699036 2023-01-22 07:17:00.065777: step: 92/77, loss: 0.10985290259122849 2023-01-22 07:17:01.327008: step: 96/77, loss: 0.06694649904966354 2023-01-22 07:17:02.648753: step: 100/77, loss: 0.04028739407658577 2023-01-22 07:17:03.900229: step: 104/77, loss: 0.07630635052919388 2023-01-22 07:17:05.204201: step: 108/77, loss: 0.03472098335623741 2023-01-22 07:17:06.478627: step: 112/77, loss: 0.12241032719612122 2023-01-22 07:17:07.791635: step: 116/77, loss: 0.02490338310599327 2023-01-22 07:17:09.089799: step: 120/77, loss: 0.1250462681055069 2023-01-22 07:17:10.379363: step: 124/77, loss: 0.05550193041563034 2023-01-22 07:17:11.683331: step: 128/77, loss: 0.01295685488730669 2023-01-22 07:17:12.962672: step: 132/77, loss: 0.03971627354621887 2023-01-22 07:17:14.241807: step: 136/77, loss: 0.10844653099775314 2023-01-22 07:17:15.537563: step: 140/77, loss: 0.0835656076669693 2023-01-22 07:17:16.867004: step: 144/77, loss: 0.04878070205450058 2023-01-22 07:17:18.158366: step: 148/77, loss: 0.0661730170249939 2023-01-22 07:17:19.427284: step: 152/77, loss: 0.07241643965244293 2023-01-22 07:17:20.705577: step: 156/77, loss: 0.01513383537530899 2023-01-22 07:17:22.013018: step: 160/77, loss: 0.021579544991254807 2023-01-22 07:17:23.343926: step: 164/77, loss: 0.04571843147277832 2023-01-22 07:17:24.664532: step: 168/77, loss: 0.05205213278532028 2023-01-22 07:17:25.957647: step: 172/77, loss: 0.07319924980401993 2023-01-22 07:17:27.235476: step: 176/77, loss: 0.0070552583783864975 2023-01-22 07:17:28.519937: step: 180/77, loss: 0.03356878086924553 2023-01-22 07:17:29.780698: step: 184/77, loss: 0.017069164663553238 2023-01-22 07:17:31.100782: step: 188/77, loss: 0.06316450238227844 2023-01-22 07:17:32.315923: step: 192/77, loss: 0.05764845013618469 2023-01-22 07:17:33.620968: step: 196/77, loss: 0.018638404086232185 2023-01-22 07:17:34.904076: step: 200/77, loss: 0.047206778079271317 2023-01-22 07:17:36.132279: step: 204/77, loss: 0.012387819588184357 2023-01-22 07:17:37.411862: step: 208/77, loss: 0.018886670470237732 2023-01-22 07:17:38.687323: step: 212/77, loss: 0.040599655359983444 2023-01-22 07:17:39.964207: step: 216/77, loss: 0.062156952917575836 2023-01-22 07:17:41.316056: step: 220/77, loss: 0.08114401996135712 2023-01-22 07:17:42.669059: step: 224/77, loss: 0.03974404186010361 2023-01-22 07:17:43.981272: step: 228/77, loss: 0.07013162225484848 2023-01-22 07:17:45.270549: step: 232/77, loss: 0.029353676363825798 2023-01-22 07:17:46.573436: step: 236/77, loss: 0.027635207399725914 2023-01-22 07:17:47.867944: step: 240/77, loss: 0.08846844732761383 2023-01-22 07:17:49.168401: step: 244/77, loss: 0.11539971828460693 2023-01-22 07:17:50.467859: step: 248/77, loss: 0.037433214485645294 2023-01-22 07:17:51.774448: step: 252/77, loss: 0.07446795701980591 2023-01-22 07:17:53.097696: step: 256/77, loss: 0.039659544825553894 2023-01-22 07:17:54.443006: step: 260/77, loss: 0.01527109369635582 2023-01-22 07:17:55.750717: step: 264/77, loss: 0.23154425621032715 2023-01-22 07:17:57.021907: step: 268/77, loss: 0.10503697395324707 2023-01-22 07:17:58.302378: step: 272/77, loss: 0.041720081120729446 2023-01-22 07:17:59.596289: step: 276/77, loss: 0.08117479085922241 2023-01-22 07:18:00.913164: step: 280/77, loss: 0.06630122661590576 2023-01-22 07:18:02.230402: step: 284/77, loss: 0.1733831912279129 2023-01-22 07:18:03.587041: step: 288/77, loss: 0.0781242698431015 2023-01-22 07:18:04.895828: step: 292/77, loss: 0.06188240647315979 2023-01-22 07:18:06.224764: step: 296/77, loss: 0.051947221159935 2023-01-22 07:18:07.512660: step: 300/77, loss: 0.040972497314214706 2023-01-22 07:18:08.847469: step: 304/77, loss: 0.03663957864046097 2023-01-22 07:18:10.177692: step: 308/77, loss: 0.07159535586833954 2023-01-22 07:18:11.526095: step: 312/77, loss: 0.15917694568634033 2023-01-22 07:18:12.871961: step: 316/77, loss: 0.037154968827962875 2023-01-22 07:18:14.115733: step: 320/77, loss: 0.07271397113800049 2023-01-22 07:18:15.400250: step: 324/77, loss: 0.1220267117023468 2023-01-22 07:18:16.691924: step: 328/77, loss: 0.03455435484647751 2023-01-22 07:18:18.018368: step: 332/77, loss: 0.019315311685204506 2023-01-22 07:18:19.306317: step: 336/77, loss: 0.05523783713579178 2023-01-22 07:18:20.680397: step: 340/77, loss: 0.03926079720258713 2023-01-22 07:18:21.992537: step: 344/77, loss: 0.10926296561956406 2023-01-22 07:18:23.301361: step: 348/77, loss: 0.0364389531314373 2023-01-22 07:18:24.578839: step: 352/77, loss: 0.015954257920384407 2023-01-22 07:18:25.886224: step: 356/77, loss: 0.25136813521385193 2023-01-22 07:18:27.238750: step: 360/77, loss: 0.013067019172012806 2023-01-22 07:18:28.562512: step: 364/77, loss: 0.005912239663302898 2023-01-22 07:18:29.908366: step: 368/77, loss: 0.06499234586954117 2023-01-22 07:18:31.157606: step: 372/77, loss: 0.06031108647584915 2023-01-22 07:18:32.449688: step: 376/77, loss: 0.07589991390705109 2023-01-22 07:18:33.762795: step: 380/77, loss: 0.01958923414349556 2023-01-22 07:18:35.062942: step: 384/77, loss: 0.01762222871184349 2023-01-22 07:18:36.394861: step: 388/77, loss: 0.008108420297503471 ================================================== Loss: 0.065 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 2} Test Chinese: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5666666666666667, 'r': 0.015454545454545455, 'f1': 0.03008849557522124}, 'combined': 0.021491782553729452, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 2} Test Korean: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5666666666666667, 'r': 0.015454545454545455, 'f1': 0.03008849557522124}, 'combined': 0.021491782553729452, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 2} Test Russian: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5666666666666667, 'r': 0.015454545454545455, 'f1': 0.03008849557522124}, 'combined': 0.021491782553729452, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 2} Test for Chinese: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5666666666666667, 'r': 0.015454545454545455, 'f1': 0.03008849557522124}, 'combined': 0.021491782553729452, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 2} Test for Korean: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5666666666666667, 'r': 0.015454545454545455, 'f1': 0.03008849557522124}, 'combined': 0.021491782553729452, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 2} Test for Russian: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5666666666666667, 'r': 0.015454545454545455, 'f1': 0.03008849557522124}, 'combined': 0.021491782553729452, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:20:40.629528: step: 4/77, loss: 0.015968766063451767 2023-01-22 07:20:41.917069: step: 8/77, loss: 0.01351113896816969 2023-01-22 07:20:43.209912: step: 12/77, loss: 0.08549536019563675 2023-01-22 07:20:44.462411: step: 16/77, loss: 0.03732670471072197 2023-01-22 07:20:45.773112: step: 20/77, loss: 0.005760162137448788 2023-01-22 07:20:47.067329: step: 24/77, loss: 0.06140680983662605 2023-01-22 07:20:48.431221: step: 28/77, loss: 0.009903283789753914 2023-01-22 07:20:49.688448: step: 32/77, loss: 0.028609666973352432 2023-01-22 07:20:50.984214: step: 36/77, loss: 0.025533368811011314 2023-01-22 07:20:52.267826: step: 40/77, loss: 0.02272067219018936 2023-01-22 07:20:53.589286: step: 44/77, loss: 0.06468820571899414 2023-01-22 07:20:54.874754: step: 48/77, loss: 0.026076534762978554 2023-01-22 07:20:56.174284: step: 52/77, loss: 0.029940230771899223 2023-01-22 07:20:57.490804: step: 56/77, loss: 0.08760607987642288 2023-01-22 07:20:58.808044: step: 60/77, loss: 0.12449557334184647 2023-01-22 07:21:00.119745: step: 64/77, loss: 0.02922476828098297 2023-01-22 07:21:01.445165: step: 68/77, loss: 0.005582435987889767 2023-01-22 07:21:02.715375: step: 72/77, loss: 0.012879885733127594 2023-01-22 07:21:03.984803: step: 76/77, loss: 0.025143388658761978 2023-01-22 07:21:05.270241: step: 80/77, loss: 0.009446179494261742 2023-01-22 07:21:06.519298: step: 84/77, loss: 0.04552457481622696 2023-01-22 07:21:07.803890: step: 88/77, loss: 0.02507692202925682 2023-01-22 07:21:09.111768: step: 92/77, loss: 0.025074133649468422 2023-01-22 07:21:10.417681: step: 96/77, loss: 0.01472495123744011 2023-01-22 07:21:11.735012: step: 100/77, loss: 0.03009127266705036 2023-01-22 07:21:13.020047: step: 104/77, loss: 0.1456586718559265 2023-01-22 07:21:14.331133: step: 108/77, loss: 0.008024273440241814 2023-01-22 07:21:15.624297: step: 112/77, loss: 0.04816785454750061 2023-01-22 07:21:16.940765: step: 116/77, loss: 0.01610010489821434 2023-01-22 07:21:18.197987: step: 120/77, loss: 0.022976523265242577 2023-01-22 07:21:19.463048: step: 124/77, loss: 0.040475912392139435 2023-01-22 07:21:20.732289: step: 128/77, loss: 0.055941663682460785 2023-01-22 07:21:21.990709: step: 132/77, loss: 0.027231454849243164 2023-01-22 07:21:23.278867: step: 136/77, loss: 0.06407906860113144 2023-01-22 07:21:24.596408: step: 140/77, loss: 0.020403718575835228 2023-01-22 07:21:25.857862: step: 144/77, loss: 0.054121024906635284 2023-01-22 07:21:27.064971: step: 148/77, loss: 0.019535942003130913 2023-01-22 07:21:28.358064: step: 152/77, loss: 0.02978493645787239 2023-01-22 07:21:29.643954: step: 156/77, loss: 0.007111942861229181 2023-01-22 07:21:30.934531: step: 160/77, loss: 0.05967719852924347 2023-01-22 07:21:32.216050: step: 164/77, loss: 0.050702475011348724 2023-01-22 07:21:33.565750: step: 168/77, loss: 0.07628966867923737 2023-01-22 07:21:34.859738: step: 172/77, loss: 0.016380734741687775 2023-01-22 07:21:36.194216: step: 176/77, loss: 0.09666801989078522 2023-01-22 07:21:37.494267: step: 180/77, loss: 0.011515635997056961 2023-01-22 07:21:38.826383: step: 184/77, loss: 0.014797764830291271 2023-01-22 07:21:40.131410: step: 188/77, loss: 0.014288538135588169 2023-01-22 07:21:41.428010: step: 192/77, loss: 0.046548232436180115 2023-01-22 07:21:42.744808: step: 196/77, loss: 0.10158401727676392 2023-01-22 07:21:44.078268: step: 200/77, loss: 0.020895320922136307 2023-01-22 07:21:45.396368: step: 204/77, loss: 0.0929812490940094 2023-01-22 07:21:46.672874: step: 208/77, loss: 0.06224067509174347 2023-01-22 07:21:48.013697: step: 212/77, loss: 0.010834470391273499 2023-01-22 07:21:49.286548: step: 216/77, loss: 0.020457429811358452 2023-01-22 07:21:50.581946: step: 220/77, loss: 0.04090658575296402 2023-01-22 07:21:51.864870: step: 224/77, loss: 0.0852619856595993 2023-01-22 07:21:53.144641: step: 228/77, loss: 0.054530490189790726 2023-01-22 07:21:54.491347: step: 232/77, loss: 0.04404990002512932 2023-01-22 07:21:55.797589: step: 236/77, loss: 0.028828389942646027 2023-01-22 07:21:57.066485: step: 240/77, loss: 0.05302087962627411 2023-01-22 07:21:58.368443: step: 244/77, loss: 0.051580313593149185 2023-01-22 07:21:59.680765: step: 248/77, loss: 0.14568468928337097 2023-01-22 07:22:00.966955: step: 252/77, loss: 0.009636200964450836 2023-01-22 07:22:02.267140: step: 256/77, loss: 0.012510381639003754 2023-01-22 07:22:03.561194: step: 260/77, loss: 0.02064824104309082 2023-01-22 07:22:04.886529: step: 264/77, loss: 0.08655968308448792 2023-01-22 07:22:06.214054: step: 268/77, loss: 0.03449642285704613 2023-01-22 07:22:07.490488: step: 272/77, loss: 0.021431952714920044 2023-01-22 07:22:08.807018: step: 276/77, loss: 0.016396211460232735 2023-01-22 07:22:10.085564: step: 280/77, loss: 0.015242512337863445 2023-01-22 07:22:11.400825: step: 284/77, loss: 0.3615311086177826 2023-01-22 07:22:12.673805: step: 288/77, loss: 0.08149930089712143 2023-01-22 07:22:14.008222: step: 292/77, loss: 0.026354094967246056 2023-01-22 07:22:15.310607: step: 296/77, loss: 0.008077921345829964 2023-01-22 07:22:16.624824: step: 300/77, loss: 0.13474395871162415 2023-01-22 07:22:17.946512: step: 304/77, loss: 0.005618092138320208 2023-01-22 07:22:19.237194: step: 308/77, loss: 0.21946494281291962 2023-01-22 07:22:20.533978: step: 312/77, loss: 0.04267258942127228 2023-01-22 07:22:21.752196: step: 316/77, loss: 0.015815965831279755 2023-01-22 07:22:23.090779: step: 320/77, loss: 0.03485114499926567 2023-01-22 07:22:24.449222: step: 324/77, loss: 0.026810774579644203 2023-01-22 07:22:25.744959: step: 328/77, loss: 0.01187300868332386 2023-01-22 07:22:27.049613: step: 332/77, loss: 0.05864952877163887 2023-01-22 07:22:28.325106: step: 336/77, loss: 0.030480477958917618 2023-01-22 07:22:29.584960: step: 340/77, loss: 0.06372788548469543 2023-01-22 07:22:30.854134: step: 344/77, loss: 0.05656794458627701 2023-01-22 07:22:32.159354: step: 348/77, loss: 0.0045874156057834625 2023-01-22 07:22:33.484614: step: 352/77, loss: 0.05805297940969467 2023-01-22 07:22:34.730961: step: 356/77, loss: 0.044932737946510315 2023-01-22 07:22:36.016318: step: 360/77, loss: 0.01705370470881462 2023-01-22 07:22:37.294174: step: 364/77, loss: 0.15946604311466217 2023-01-22 07:22:38.565880: step: 368/77, loss: 0.006360077764838934 2023-01-22 07:22:39.837731: step: 372/77, loss: 0.04374031722545624 2023-01-22 07:22:41.118232: step: 376/77, loss: 0.006530561950057745 2023-01-22 07:22:42.422119: step: 380/77, loss: 0.0038705565966665745 2023-01-22 07:22:43.705234: step: 384/77, loss: 0.06794550269842148 2023-01-22 07:22:45.014185: step: 388/77, loss: 0.15991713106632233 ================================================== Loss: 0.047 -------------------- Dev Chinese: {'template': {'p': 0.9090909090909091, 'r': 0.5, 'f1': 0.6451612903225806}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04535404501388967, 'epoch': 3} Test Chinese: {'template': {'p': 0.9466666666666667, 'r': 0.5772357723577236, 'f1': 0.7171717171717171}, 'slot': {'p': 0.4722222222222222, 'r': 0.015454545454545455, 'f1': 0.029929577464788734}, 'combined': 0.021464646464646464, 'epoch': 3} Dev Korean: {'template': {'p': 0.90625, 'r': 0.48333333333333334, 'f1': 0.6304347826086957}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04431878963857263, 'epoch': 3} Test Korean: {'template': {'p': 0.9466666666666667, 'r': 0.5772357723577236, 'f1': 0.7171717171717171}, 'slot': {'p': 0.4722222222222222, 'r': 0.015454545454545455, 'f1': 0.029929577464788734}, 'combined': 0.021464646464646464, 'epoch': 3} Dev Russian: {'template': {'p': 0.90625, 'r': 0.48333333333333334, 'f1': 0.6304347826086957}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04431878963857263, 'epoch': 3} Test Russian: {'template': {'p': 0.9466666666666667, 'r': 0.5772357723577236, 'f1': 0.7171717171717171}, 'slot': {'p': 0.4722222222222222, 'r': 0.015454545454545455, 'f1': 0.029929577464788734}, 'combined': 0.021464646464646464, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.021621621621621623, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.025806451612903226, 'epoch': 3} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 2} Test for Chinese: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5666666666666667, 'r': 0.015454545454545455, 'f1': 0.03008849557522124}, 'combined': 0.021491782553729452, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 2} Test for Korean: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5666666666666667, 'r': 0.015454545454545455, 'f1': 0.03008849557522124}, 'combined': 0.021491782553729452, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 2} Test for Russian: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5666666666666667, 'r': 0.015454545454545455, 'f1': 0.03008849557522124}, 'combined': 0.021491782553729452, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 4 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:24:20.534839: step: 4/77, loss: 0.1591966152191162 2023-01-22 07:24:21.788228: step: 8/77, loss: 0.0036322043742984533 2023-01-22 07:24:23.056355: step: 12/77, loss: 0.0035585726145654917 2023-01-22 07:24:24.284628: step: 16/77, loss: 0.03981819003820419 2023-01-22 07:24:25.587856: step: 20/77, loss: 0.012272844091057777 2023-01-22 07:24:26.888611: step: 24/77, loss: 0.03815988078713417 2023-01-22 07:24:28.187457: step: 28/77, loss: 0.07955527305603027 2023-01-22 07:24:29.532591: step: 32/77, loss: 0.03334498777985573 2023-01-22 07:24:30.854133: step: 36/77, loss: 0.02782886102795601 2023-01-22 07:24:32.116594: step: 40/77, loss: 0.05034760758280754 2023-01-22 07:24:33.442934: step: 44/77, loss: 0.046723365783691406 2023-01-22 07:24:34.683799: step: 48/77, loss: 0.04478670284152031 2023-01-22 07:24:35.960904: step: 52/77, loss: 0.00868067517876625 2023-01-22 07:24:37.192269: step: 56/77, loss: 0.0023823282681405544 2023-01-22 07:24:38.471462: step: 60/77, loss: 0.12383562326431274 2023-01-22 07:24:39.752352: step: 64/77, loss: 0.016625147312879562 2023-01-22 07:24:41.035846: step: 68/77, loss: 0.09449809789657593 2023-01-22 07:24:42.318146: step: 72/77, loss: 0.026286687701940536 2023-01-22 07:24:43.557301: step: 76/77, loss: 0.010752486996352673 2023-01-22 07:24:44.868039: step: 80/77, loss: 0.025785956531763077 2023-01-22 07:24:46.119210: step: 84/77, loss: 0.00048810700536705554 2023-01-22 07:24:47.396784: step: 88/77, loss: 0.03618944063782692 2023-01-22 07:24:48.661484: step: 92/77, loss: 0.010275091975927353 2023-01-22 07:24:49.955103: step: 96/77, loss: 0.019382059574127197 2023-01-22 07:24:51.250733: step: 100/77, loss: 0.022107046097517014 2023-01-22 07:24:52.526220: step: 104/77, loss: 0.03271938115358353 2023-01-22 07:24:53.850188: step: 108/77, loss: 0.02639208734035492 2023-01-22 07:24:55.110411: step: 112/77, loss: 0.07708106935024261 2023-01-22 07:24:56.364082: step: 116/77, loss: 0.01799575239419937 2023-01-22 07:24:57.619258: step: 120/77, loss: 0.035872627049684525 2023-01-22 07:24:58.892683: step: 124/77, loss: 0.005020597018301487 2023-01-22 07:25:00.192560: step: 128/77, loss: 0.048189785331487656 2023-01-22 07:25:01.525789: step: 132/77, loss: 0.012047644704580307 2023-01-22 07:25:02.854661: step: 136/77, loss: 0.06587105244398117 2023-01-22 07:25:04.172920: step: 140/77, loss: 0.17298981547355652 2023-01-22 07:25:05.477652: step: 144/77, loss: 0.03656620532274246 2023-01-22 07:25:06.751406: step: 148/77, loss: 0.03557945042848587 2023-01-22 07:25:08.066896: step: 152/77, loss: 0.0015012272633612156 2023-01-22 07:25:09.348512: step: 156/77, loss: 0.011846780776977539 2023-01-22 07:25:10.626304: step: 160/77, loss: 0.06310739368200302 2023-01-22 07:25:11.901145: step: 164/77, loss: 0.016693251207470894 2023-01-22 07:25:13.206302: step: 168/77, loss: 0.03192953020334244 2023-01-22 07:25:14.484245: step: 172/77, loss: 0.027641138061881065 2023-01-22 07:25:15.798326: step: 176/77, loss: 0.03455713763833046 2023-01-22 07:25:17.103372: step: 180/77, loss: 0.02429923042654991 2023-01-22 07:25:18.408064: step: 184/77, loss: 0.004264523275196552 2023-01-22 07:25:19.721598: step: 188/77, loss: 0.014208164997398853 2023-01-22 07:25:20.997607: step: 192/77, loss: 0.01190916821360588 2023-01-22 07:25:22.275915: step: 196/77, loss: 0.056420039385557175 2023-01-22 07:25:23.584339: step: 200/77, loss: 0.018400974571704865 2023-01-22 07:25:24.911196: step: 204/77, loss: 0.04778168350458145 2023-01-22 07:25:26.179593: step: 208/77, loss: 0.011265124194324017 2023-01-22 07:25:27.452886: step: 212/77, loss: 0.005715106148272753 2023-01-22 07:25:28.727098: step: 216/77, loss: 0.012743368744850159 2023-01-22 07:25:29.985052: step: 220/77, loss: 0.03328631818294525 2023-01-22 07:25:31.243421: step: 224/77, loss: 0.13442906737327576 2023-01-22 07:25:32.532398: step: 228/77, loss: 0.03768271580338478 2023-01-22 07:25:33.850511: step: 232/77, loss: 0.15616267919540405 2023-01-22 07:25:35.152378: step: 236/77, loss: 0.010896775871515274 2023-01-22 07:25:36.413886: step: 240/77, loss: 0.02437608689069748 2023-01-22 07:25:37.719024: step: 244/77, loss: 0.0066629135981202126 2023-01-22 07:25:39.034489: step: 248/77, loss: 0.014826871454715729 2023-01-22 07:25:40.348677: step: 252/77, loss: 0.06286932528018951 2023-01-22 07:25:41.589428: step: 256/77, loss: 0.03753984346985817 2023-01-22 07:25:42.852154: step: 260/77, loss: 0.024473311379551888 2023-01-22 07:25:44.158306: step: 264/77, loss: 0.02880875952541828 2023-01-22 07:25:45.471597: step: 268/77, loss: 0.0316791832447052 2023-01-22 07:25:46.742312: step: 272/77, loss: 0.03705769032239914 2023-01-22 07:25:48.033052: step: 276/77, loss: 0.0716656893491745 2023-01-22 07:25:49.331319: step: 280/77, loss: 0.011389000341296196 2023-01-22 07:25:50.631225: step: 284/77, loss: 0.027336813509464264 2023-01-22 07:25:51.877893: step: 288/77, loss: 0.033267825841903687 2023-01-22 07:25:53.192187: step: 292/77, loss: 0.026131270453333855 2023-01-22 07:25:54.456249: step: 296/77, loss: 0.00949370302259922 2023-01-22 07:25:55.751056: step: 300/77, loss: 0.02307109721004963 2023-01-22 07:25:57.044198: step: 304/77, loss: 0.056111834943294525 2023-01-22 07:25:58.377015: step: 308/77, loss: 0.05641761049628258 2023-01-22 07:25:59.628911: step: 312/77, loss: 0.02172068879008293 2023-01-22 07:26:00.966678: step: 316/77, loss: 0.1652591973543167 2023-01-22 07:26:02.315225: step: 320/77, loss: 0.021472467109560966 2023-01-22 07:26:03.632004: step: 324/77, loss: 0.012137623503804207 2023-01-22 07:26:04.909829: step: 328/77, loss: 0.01814662106335163 2023-01-22 07:26:06.228860: step: 332/77, loss: 0.020530683919787407 2023-01-22 07:26:07.514531: step: 336/77, loss: 0.018007539212703705 2023-01-22 07:26:08.771972: step: 340/77, loss: 0.0415189191699028 2023-01-22 07:26:10.057627: step: 344/77, loss: 0.01093345694243908 2023-01-22 07:26:11.363958: step: 348/77, loss: 0.026807358488440514 2023-01-22 07:26:12.624999: step: 352/77, loss: 0.013095011003315449 2023-01-22 07:26:13.910864: step: 356/77, loss: 0.02017892897129059 2023-01-22 07:26:15.270474: step: 360/77, loss: 0.06183590739965439 2023-01-22 07:26:16.533523: step: 364/77, loss: 0.11347439140081406 2023-01-22 07:26:17.830799: step: 368/77, loss: 0.08657459169626236 2023-01-22 07:26:19.175620: step: 372/77, loss: 0.02488943189382553 2023-01-22 07:26:20.409051: step: 376/77, loss: 0.002923357766121626 2023-01-22 07:26:21.704830: step: 380/77, loss: 0.0434151217341423 2023-01-22 07:26:22.975274: step: 384/77, loss: 0.06214475631713867 2023-01-22 07:26:24.289003: step: 388/77, loss: 0.043125901371240616 ================================================== Loss: 0.038 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 4} Test Chinese: {'template': {'p': 0.9846153846153847, 'r': 0.5203252032520326, 'f1': 0.6808510638297873}, 'slot': {'p': 0.4838709677419355, 'r': 0.013636363636363636, 'f1': 0.026525198938992044}, 'combined': 0.01805970991590948, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 4} Test Korean: {'template': {'p': 0.9846153846153847, 'r': 0.5203252032520326, 'f1': 0.6808510638297873}, 'slot': {'p': 0.4838709677419355, 'r': 0.013636363636363636, 'f1': 0.026525198938992044}, 'combined': 0.01805970991590948, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 4} Test Russian: {'template': {'p': 0.9848484848484849, 'r': 0.5284552845528455, 'f1': 0.6878306878306879}, 'slot': {'p': 0.5151515151515151, 'r': 0.015454545454545455, 'f1': 0.030008826125330977}, 'combined': 0.020640991514777923, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 2} Test for Chinese: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5666666666666667, 'r': 0.015454545454545455, 'f1': 0.03008849557522124}, 'combined': 0.021491782553729452, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 2} Test for Korean: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5666666666666667, 'r': 0.015454545454545455, 'f1': 0.03008849557522124}, 'combined': 0.021491782553729452, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 2} Test for Russian: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5666666666666667, 'r': 0.015454545454545455, 'f1': 0.03008849557522124}, 'combined': 0.021491782553729452, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 5 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:28:00.134919: step: 4/77, loss: 0.05281934514641762 2023-01-22 07:28:01.337419: step: 8/77, loss: 0.10034886747598648 2023-01-22 07:28:02.645760: step: 12/77, loss: 0.008094294928014278 2023-01-22 07:28:03.955427: step: 16/77, loss: 0.007729171775281429 2023-01-22 07:28:05.219569: step: 20/77, loss: 0.0249926894903183 2023-01-22 07:28:06.522479: step: 24/77, loss: 0.014677442610263824 2023-01-22 07:28:07.767331: step: 28/77, loss: 0.015226608142256737 2023-01-22 07:28:09.067048: step: 32/77, loss: 0.09592887759208679 2023-01-22 07:28:10.332054: step: 36/77, loss: 0.038398392498493195 2023-01-22 07:28:11.597521: step: 40/77, loss: 0.014437681064009666 2023-01-22 07:28:12.834711: step: 44/77, loss: 0.02251637540757656 2023-01-22 07:28:14.168194: step: 48/77, loss: 0.09548071026802063 2023-01-22 07:28:15.439371: step: 52/77, loss: 0.05408954620361328 2023-01-22 07:28:16.699185: step: 56/77, loss: 0.012811540625989437 2023-01-22 07:28:18.000942: step: 60/77, loss: 0.06582754850387573 2023-01-22 07:28:19.272775: step: 64/77, loss: 0.011044980026781559 2023-01-22 07:28:20.603695: step: 68/77, loss: 0.01578996330499649 2023-01-22 07:28:21.929544: step: 72/77, loss: 0.03434739634394646 2023-01-22 07:28:23.238375: step: 76/77, loss: 0.02023492194712162 2023-01-22 07:28:24.516038: step: 80/77, loss: 0.005539305973798037 2023-01-22 07:28:25.763341: step: 84/77, loss: 0.012604881078004837 2023-01-22 07:28:27.050347: step: 88/77, loss: 0.04681181162595749 2023-01-22 07:28:28.361830: step: 92/77, loss: 0.02078882046043873 2023-01-22 07:28:29.670017: step: 96/77, loss: 0.040004126727581024 2023-01-22 07:28:30.905653: step: 100/77, loss: 0.034155894070863724 2023-01-22 07:28:32.224236: step: 104/77, loss: 0.01139921136200428 2023-01-22 07:28:33.554098: step: 108/77, loss: 0.006441830191761255 2023-01-22 07:28:34.806621: step: 112/77, loss: 0.0597098171710968 2023-01-22 07:28:36.079217: step: 116/77, loss: 0.012973377481102943 2023-01-22 07:28:37.346293: step: 120/77, loss: 0.07683604210615158 2023-01-22 07:28:38.646334: step: 124/77, loss: 0.053127095103263855 2023-01-22 07:28:39.942692: step: 128/77, loss: 0.01308278739452362 2023-01-22 07:28:41.203159: step: 132/77, loss: 0.01676439866423607 2023-01-22 07:28:42.491549: step: 136/77, loss: 0.11097963154315948 2023-01-22 07:28:43.778667: step: 140/77, loss: 0.04553266242146492 2023-01-22 07:28:45.061384: step: 144/77, loss: 0.05289888381958008 2023-01-22 07:28:46.343498: step: 148/77, loss: 0.01658434234559536 2023-01-22 07:28:47.610679: step: 152/77, loss: 0.07442338019609451 2023-01-22 07:28:48.933295: step: 156/77, loss: 0.05406294763088226 2023-01-22 07:28:50.200283: step: 160/77, loss: 0.010441828519105911 2023-01-22 07:28:51.450860: step: 164/77, loss: 0.0038215755484998226 2023-01-22 07:28:52.731269: step: 168/77, loss: 0.0016422360204160213 2023-01-22 07:28:54.039322: step: 172/77, loss: 0.07169732451438904 2023-01-22 07:28:55.355881: step: 176/77, loss: 0.01746036671102047 2023-01-22 07:28:56.671387: step: 180/77, loss: 0.04688744246959686 2023-01-22 07:28:58.026215: step: 184/77, loss: 0.023346032947301865 2023-01-22 07:28:59.308390: step: 188/77, loss: 0.022755956277251244 2023-01-22 07:29:00.591678: step: 192/77, loss: 0.00830506905913353 2023-01-22 07:29:01.861321: step: 196/77, loss: 0.006505207624286413 2023-01-22 07:29:03.186198: step: 200/77, loss: 0.034292690455913544 2023-01-22 07:29:04.544400: step: 204/77, loss: 0.1433955430984497 2023-01-22 07:29:05.784315: step: 208/77, loss: 0.011057563126087189 2023-01-22 07:29:07.077537: step: 212/77, loss: 0.02051542140543461 2023-01-22 07:29:08.326794: step: 216/77, loss: 0.015034375712275505 2023-01-22 07:29:09.615159: step: 220/77, loss: 0.02420666441321373 2023-01-22 07:29:10.953511: step: 224/77, loss: 0.07697256654500961 2023-01-22 07:29:12.217615: step: 228/77, loss: 0.03689153492450714 2023-01-22 07:29:13.503974: step: 232/77, loss: 0.015925971791148186 2023-01-22 07:29:14.773777: step: 236/77, loss: 0.0585017055273056 2023-01-22 07:29:16.103524: step: 240/77, loss: 0.009359755553305149 2023-01-22 07:29:17.350640: step: 244/77, loss: 0.021430548280477524 2023-01-22 07:29:18.668984: step: 248/77, loss: 0.018590224906802177 2023-01-22 07:29:19.974181: step: 252/77, loss: 0.01646292582154274 2023-01-22 07:29:21.303563: step: 256/77, loss: 0.049518853425979614 2023-01-22 07:29:22.619476: step: 260/77, loss: 0.02466382458806038 2023-01-22 07:29:23.974806: step: 264/77, loss: 0.007570900954306126 2023-01-22 07:29:25.309596: step: 268/77, loss: 0.1567116677761078 2023-01-22 07:29:26.653572: step: 272/77, loss: 0.044765837490558624 2023-01-22 07:29:27.945881: step: 276/77, loss: 0.04036088287830353 2023-01-22 07:29:29.268386: step: 280/77, loss: 0.0008329019183292985 2023-01-22 07:29:30.587348: step: 284/77, loss: 0.12199905514717102 2023-01-22 07:29:31.865541: step: 288/77, loss: 0.045698583126068115 2023-01-22 07:29:33.221173: step: 292/77, loss: 0.02223210595548153 2023-01-22 07:29:34.526796: step: 296/77, loss: 0.07185053825378418 2023-01-22 07:29:35.854915: step: 300/77, loss: 0.025253664702177048 2023-01-22 07:29:37.124430: step: 304/77, loss: 0.035008110105991364 2023-01-22 07:29:38.405181: step: 308/77, loss: 0.07302802056074142 2023-01-22 07:29:39.675252: step: 312/77, loss: 0.07924443483352661 2023-01-22 07:29:40.917945: step: 316/77, loss: 0.019673509523272514 2023-01-22 07:29:42.187511: step: 320/77, loss: 0.07078725099563599 2023-01-22 07:29:43.442570: step: 324/77, loss: 0.014910933561623096 2023-01-22 07:29:44.736065: step: 328/77, loss: 0.02365684136748314 2023-01-22 07:29:45.993857: step: 332/77, loss: 0.032946422696113586 2023-01-22 07:29:47.317969: step: 336/77, loss: 0.023019496351480484 2023-01-22 07:29:48.634010: step: 340/77, loss: 0.05330086126923561 2023-01-22 07:29:49.951808: step: 344/77, loss: 0.08135435730218887 2023-01-22 07:29:51.297165: step: 348/77, loss: 0.08608993887901306 2023-01-22 07:29:52.601978: step: 352/77, loss: 0.028688719496130943 2023-01-22 07:29:53.888797: step: 356/77, loss: 0.006268288008868694 2023-01-22 07:29:55.210681: step: 360/77, loss: 0.03752407804131508 2023-01-22 07:29:56.447721: step: 364/77, loss: 0.012255517765879631 2023-01-22 07:29:57.731220: step: 368/77, loss: 0.18901900947093964 2023-01-22 07:29:59.027480: step: 372/77, loss: 0.017559271305799484 2023-01-22 07:30:00.304665: step: 376/77, loss: 0.017930801957845688 2023-01-22 07:30:01.587588: step: 380/77, loss: 0.0486404225230217 2023-01-22 07:30:02.920256: step: 384/77, loss: 0.021391339600086212 2023-01-22 07:30:04.245309: step: 388/77, loss: 0.010606233961880207 ================================================== Loss: 0.039 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 6 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:32:07.981745: step: 4/77, loss: 0.014090805314481258 2023-01-22 07:32:09.252200: step: 8/77, loss: 0.037653181701898575 2023-01-22 07:32:10.562065: step: 12/77, loss: 0.012905780225992203 2023-01-22 07:32:11.832518: step: 16/77, loss: 0.12145353853702545 2023-01-22 07:32:13.128756: step: 20/77, loss: 0.004029234871268272 2023-01-22 07:32:14.428653: step: 24/77, loss: 0.0458088181912899 2023-01-22 07:32:15.701896: step: 28/77, loss: 0.013678375631570816 2023-01-22 07:32:16.997473: step: 32/77, loss: 0.016728755086660385 2023-01-22 07:32:18.262397: step: 36/77, loss: 0.00028109041159041226 2023-01-22 07:32:19.569559: step: 40/77, loss: 0.010670517571270466 2023-01-22 07:32:20.829516: step: 44/77, loss: 0.05759980529546738 2023-01-22 07:32:22.081895: step: 48/77, loss: 0.030156835913658142 2023-01-22 07:32:23.323741: step: 52/77, loss: 0.013267605565488338 2023-01-22 07:32:24.619979: step: 56/77, loss: 0.056651029735803604 2023-01-22 07:32:25.918622: step: 60/77, loss: 0.07792923599481583 2023-01-22 07:32:27.200511: step: 64/77, loss: 0.048105549067258835 2023-01-22 07:32:28.492746: step: 68/77, loss: 0.0016152573516592383 2023-01-22 07:32:29.817194: step: 72/77, loss: 0.01203523576259613 2023-01-22 07:32:31.073962: step: 76/77, loss: 0.00038057187339290977 2023-01-22 07:32:32.357816: step: 80/77, loss: 0.00727054663002491 2023-01-22 07:32:33.630776: step: 84/77, loss: 0.008634903468191624 2023-01-22 07:32:34.882829: step: 88/77, loss: 0.05545460432767868 2023-01-22 07:32:36.138351: step: 92/77, loss: 0.08746394515037537 2023-01-22 07:32:37.465630: step: 96/77, loss: 0.0012042783200740814 2023-01-22 07:32:38.754516: step: 100/77, loss: 0.0012032882077619433 2023-01-22 07:32:40.055067: step: 104/77, loss: 0.03924532234668732 2023-01-22 07:32:41.339768: step: 108/77, loss: 0.07420468330383301 2023-01-22 07:32:42.610696: step: 112/77, loss: 0.031061705201864243 2023-01-22 07:32:43.864085: step: 116/77, loss: 0.010193225927650928 2023-01-22 07:32:45.163970: step: 120/77, loss: 0.02527455799281597 2023-01-22 07:32:46.395624: step: 124/77, loss: 0.10996196419000626 2023-01-22 07:32:47.644253: step: 128/77, loss: 0.022913135588169098 2023-01-22 07:32:48.948580: step: 132/77, loss: 0.04355445131659508 2023-01-22 07:32:50.294614: step: 136/77, loss: 0.08999864012002945 2023-01-22 07:32:51.571570: step: 140/77, loss: 0.1155029907822609 2023-01-22 07:32:52.898542: step: 144/77, loss: 0.015417231246829033 2023-01-22 07:32:54.202958: step: 148/77, loss: 0.0026104964781552553 2023-01-22 07:32:55.497720: step: 152/77, loss: 0.015371415764093399 2023-01-22 07:32:56.802314: step: 156/77, loss: 0.028408939018845558 2023-01-22 07:32:58.133472: step: 160/77, loss: 0.013671678490936756 2023-01-22 07:32:59.394432: step: 164/77, loss: 0.0016819187439978123 2023-01-22 07:33:00.708353: step: 168/77, loss: 0.016836147755384445 2023-01-22 07:33:02.011191: step: 172/77, loss: 0.028055887669324875 2023-01-22 07:33:03.318904: step: 176/77, loss: 0.02009519934654236 2023-01-22 07:33:04.614906: step: 180/77, loss: 0.02400626242160797 2023-01-22 07:33:05.908073: step: 184/77, loss: 0.020713580772280693 2023-01-22 07:33:07.212181: step: 188/77, loss: 0.021156713366508484 2023-01-22 07:33:08.533602: step: 192/77, loss: 0.004837479908019304 2023-01-22 07:33:09.831117: step: 196/77, loss: 0.017320267856121063 2023-01-22 07:33:11.119135: step: 200/77, loss: 0.020907748490571976 2023-01-22 07:33:12.442452: step: 204/77, loss: 0.04583645239472389 2023-01-22 07:33:13.737609: step: 208/77, loss: 0.014641058631241322 2023-01-22 07:33:15.014845: step: 212/77, loss: 0.022127849981188774 2023-01-22 07:33:16.338229: step: 216/77, loss: 0.036513473838567734 2023-01-22 07:33:17.675503: step: 220/77, loss: 0.03856131061911583 2023-01-22 07:33:18.991837: step: 224/77, loss: 0.005931971129029989 2023-01-22 07:33:20.296300: step: 228/77, loss: 0.020555173978209496 2023-01-22 07:33:21.552602: step: 232/77, loss: 0.1392797976732254 2023-01-22 07:33:22.857281: step: 236/77, loss: 0.0072579397819936275 2023-01-22 07:33:24.122754: step: 240/77, loss: 0.016458049416542053 2023-01-22 07:33:25.443522: step: 244/77, loss: 0.017515050247311592 2023-01-22 07:33:26.708720: step: 248/77, loss: 0.015963345766067505 2023-01-22 07:33:28.004831: step: 252/77, loss: 0.04388893023133278 2023-01-22 07:33:29.269739: step: 256/77, loss: 0.011263003572821617 2023-01-22 07:33:30.518323: step: 260/77, loss: 0.051796067506074905 2023-01-22 07:33:31.839811: step: 264/77, loss: 0.010814322158694267 2023-01-22 07:33:33.053982: step: 268/77, loss: 0.014244206249713898 2023-01-22 07:33:34.285330: step: 272/77, loss: 0.01440565288066864 2023-01-22 07:33:35.553058: step: 276/77, loss: 0.017506180331110954 2023-01-22 07:33:36.858835: step: 280/77, loss: 0.02340465411543846 2023-01-22 07:33:38.154344: step: 284/77, loss: 0.054062407463788986 2023-01-22 07:33:39.434775: step: 288/77, loss: 0.01714850217103958 2023-01-22 07:33:40.739410: step: 292/77, loss: 0.02229364961385727 2023-01-22 07:33:42.058662: step: 296/77, loss: 0.014405068010091782 2023-01-22 07:33:43.359146: step: 300/77, loss: 0.06514809280633926 2023-01-22 07:33:44.660471: step: 304/77, loss: 0.029408197849988937 2023-01-22 07:33:45.979277: step: 308/77, loss: 0.010319838300347328 2023-01-22 07:33:47.224858: step: 312/77, loss: 0.010362434200942516 2023-01-22 07:33:48.512002: step: 316/77, loss: 0.002917802194133401 2023-01-22 07:33:49.848127: step: 320/77, loss: 0.018622087314724922 2023-01-22 07:33:51.150112: step: 324/77, loss: 0.024601642042398453 2023-01-22 07:33:52.411966: step: 328/77, loss: 0.0031394544057548046 2023-01-22 07:33:53.697040: step: 332/77, loss: 0.00596038531512022 2023-01-22 07:33:55.003910: step: 336/77, loss: 0.02685163915157318 2023-01-22 07:33:56.339599: step: 340/77, loss: 0.019619952887296677 2023-01-22 07:33:57.689168: step: 344/77, loss: 0.003369607264176011 2023-01-22 07:33:59.002302: step: 348/77, loss: 0.03238417208194733 2023-01-22 07:34:00.333673: step: 352/77, loss: 0.03758778050541878 2023-01-22 07:34:01.570874: step: 356/77, loss: 0.006597156636416912 2023-01-22 07:34:02.871489: step: 360/77, loss: 0.007597966585308313 2023-01-22 07:34:04.211102: step: 364/77, loss: 0.024710290133953094 2023-01-22 07:34:05.523048: step: 368/77, loss: 0.0800224095582962 2023-01-22 07:34:06.797999: step: 372/77, loss: 0.11172007769346237 2023-01-22 07:34:08.081970: step: 376/77, loss: 0.029980309307575226 2023-01-22 07:34:09.398194: step: 380/77, loss: 0.03759705647826195 2023-01-22 07:34:10.734802: step: 384/77, loss: 0.012880472466349602 2023-01-22 07:34:12.036696: step: 388/77, loss: 0.006357981823384762 ================================================== Loss: 0.029 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 6} Test Chinese: {'template': {'p': 0.9615384615384616, 'r': 0.6097560975609756, 'f1': 0.7462686567164178}, 'slot': {'p': 0.46875, 'r': 0.013636363636363636, 'f1': 0.026501766784452298}, 'combined': 0.019777437898844997, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 6} Test Korean: {'template': {'p': 0.9615384615384616, 'r': 0.6097560975609756, 'f1': 0.7462686567164178}, 'slot': {'p': 0.46875, 'r': 0.013636363636363636, 'f1': 0.026501766784452298}, 'combined': 0.019777437898844997, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 6} Test Russian: {'template': {'p': 0.974025974025974, 'r': 0.6097560975609756, 'f1': 0.7500000000000001}, 'slot': {'p': 0.5, 'r': 0.013636363636363636, 'f1': 0.026548672566371678}, 'combined': 0.01991150442477876, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 6} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 7 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:35:49.010608: step: 4/77, loss: 0.00365395937114954 2023-01-22 07:35:50.279429: step: 8/77, loss: 0.006277657113969326 2023-01-22 07:35:51.553027: step: 12/77, loss: 0.020647669211030006 2023-01-22 07:35:52.832220: step: 16/77, loss: 0.019607748836278915 2023-01-22 07:35:54.066006: step: 20/77, loss: 0.022213328629732132 2023-01-22 07:35:55.340307: step: 24/77, loss: 0.011937039904296398 2023-01-22 07:35:56.602442: step: 28/77, loss: 0.02456793561577797 2023-01-22 07:35:57.894455: step: 32/77, loss: 0.04214583709836006 2023-01-22 07:35:59.158847: step: 36/77, loss: 0.05198121815919876 2023-01-22 07:36:00.435054: step: 40/77, loss: 0.012112004682421684 2023-01-22 07:36:01.712877: step: 44/77, loss: 0.0383678674697876 2023-01-22 07:36:03.023146: step: 48/77, loss: 0.020618222653865814 2023-01-22 07:36:04.311273: step: 52/77, loss: 0.00845943484455347 2023-01-22 07:36:05.560837: step: 56/77, loss: 0.001100810943171382 2023-01-22 07:36:06.818657: step: 60/77, loss: 0.030928371474146843 2023-01-22 07:36:08.098934: step: 64/77, loss: 0.01059103012084961 2023-01-22 07:36:09.428203: step: 68/77, loss: 0.015934430062770844 2023-01-22 07:36:10.684843: step: 72/77, loss: 0.002905278466641903 2023-01-22 07:36:11.966223: step: 76/77, loss: 0.0014717906014993787 2023-01-22 07:36:13.261153: step: 80/77, loss: 0.014231668785214424 2023-01-22 07:36:14.570499: step: 84/77, loss: 0.01598450541496277 2023-01-22 07:36:15.893001: step: 88/77, loss: 6.96962742949836e-05 2023-01-22 07:36:17.235906: step: 92/77, loss: 0.0290484931319952 2023-01-22 07:36:18.497332: step: 96/77, loss: 0.027346597984433174 2023-01-22 07:36:19.817243: step: 100/77, loss: 0.01010982133448124 2023-01-22 07:36:21.064499: step: 104/77, loss: 0.028047332540154457 2023-01-22 07:36:22.402097: step: 108/77, loss: 0.003457102458924055 2023-01-22 07:36:23.755625: step: 112/77, loss: 0.03708508610725403 2023-01-22 07:36:25.051752: step: 116/77, loss: 0.016202857717871666 2023-01-22 07:36:26.388123: step: 120/77, loss: 0.04042280465364456 2023-01-22 07:36:27.671812: step: 124/77, loss: 0.040032271295785904 2023-01-22 07:36:29.014285: step: 128/77, loss: 0.05625339224934578 2023-01-22 07:36:30.337327: step: 132/77, loss: 0.034802548587322235 2023-01-22 07:36:31.616087: step: 136/77, loss: 0.0036582592874765396 2023-01-22 07:36:32.939396: step: 140/77, loss: 0.00691894581541419 2023-01-22 07:36:34.186647: step: 144/77, loss: 0.013843704015016556 2023-01-22 07:36:35.491272: step: 148/77, loss: 0.005941564217209816 2023-01-22 07:36:36.804938: step: 152/77, loss: 0.06688333302736282 2023-01-22 07:36:38.082842: step: 156/77, loss: 0.01571185328066349 2023-01-22 07:36:39.365809: step: 160/77, loss: 0.0064134192653000355 2023-01-22 07:36:40.690160: step: 164/77, loss: 0.0006494708359241486 2023-01-22 07:36:41.966052: step: 168/77, loss: 0.00016186948050744832 2023-01-22 07:36:43.262153: step: 172/77, loss: 0.03260376676917076 2023-01-22 07:36:44.552632: step: 176/77, loss: 0.016576889902353287 2023-01-22 07:36:45.878618: step: 180/77, loss: 0.08860062062740326 2023-01-22 07:36:47.129017: step: 184/77, loss: 0.004775453824549913 2023-01-22 07:36:48.417204: step: 188/77, loss: 0.10617873817682266 2023-01-22 07:36:49.711127: step: 192/77, loss: 0.014482242986559868 2023-01-22 07:36:51.017728: step: 196/77, loss: 0.037338465452194214 2023-01-22 07:36:52.316784: step: 200/77, loss: 0.011880840174853802 2023-01-22 07:36:53.617654: step: 204/77, loss: 0.02243220992386341 2023-01-22 07:36:54.919375: step: 208/77, loss: 0.006865553557872772 2023-01-22 07:36:56.268950: step: 212/77, loss: 0.023941833525896072 2023-01-22 07:36:57.590132: step: 216/77, loss: 0.048299212008714676 2023-01-22 07:36:58.913540: step: 220/77, loss: 0.04769997298717499 2023-01-22 07:37:00.184105: step: 224/77, loss: 0.0007127886055968702 2023-01-22 07:37:01.493368: step: 228/77, loss: 0.0004685519670601934 2023-01-22 07:37:02.828187: step: 232/77, loss: 0.020813144743442535 2023-01-22 07:37:04.154604: step: 236/77, loss: 0.005600340198725462 2023-01-22 07:37:05.494864: step: 240/77, loss: 0.10227024555206299 2023-01-22 07:37:06.865889: step: 244/77, loss: 0.015351877547800541 2023-01-22 07:37:08.202644: step: 248/77, loss: 0.0005788762355223298 2023-01-22 07:37:09.485223: step: 252/77, loss: 0.003927960526198149 2023-01-22 07:37:10.809264: step: 256/77, loss: 0.02673269994556904 2023-01-22 07:37:12.090438: step: 260/77, loss: 0.039880044758319855 2023-01-22 07:37:13.402468: step: 264/77, loss: 0.03469827026128769 2023-01-22 07:37:14.669701: step: 268/77, loss: 0.011209025979042053 2023-01-22 07:37:15.951740: step: 272/77, loss: 0.013481708243489265 2023-01-22 07:37:17.273324: step: 276/77, loss: 0.02489202469587326 2023-01-22 07:37:18.642960: step: 280/77, loss: 0.018024705350399017 2023-01-22 07:37:19.968215: step: 284/77, loss: 0.006658963393419981 2023-01-22 07:37:21.301948: step: 288/77, loss: 0.032713882625103 2023-01-22 07:37:22.600364: step: 292/77, loss: 0.013792910613119602 2023-01-22 07:37:23.920649: step: 296/77, loss: 0.02276519313454628 2023-01-22 07:37:25.203125: step: 300/77, loss: 0.013171669095754623 2023-01-22 07:37:26.485396: step: 304/77, loss: 0.0017072930932044983 2023-01-22 07:37:27.755714: step: 308/77, loss: 0.00681102741509676 2023-01-22 07:37:29.013853: step: 312/77, loss: 0.019400490447878838 2023-01-22 07:37:30.352331: step: 316/77, loss: 0.029169730842113495 2023-01-22 07:37:31.707058: step: 320/77, loss: 0.0014584583695977926 2023-01-22 07:37:33.031862: step: 324/77, loss: 0.04748348891735077 2023-01-22 07:37:34.370612: step: 328/77, loss: 0.013542639091610909 2023-01-22 07:37:35.624900: step: 332/77, loss: 0.03388283774256706 2023-01-22 07:37:36.989545: step: 336/77, loss: 0.00780377397313714 2023-01-22 07:37:38.289892: step: 340/77, loss: 0.039747536182403564 2023-01-22 07:37:39.626951: step: 344/77, loss: 0.014999479055404663 2023-01-22 07:37:40.925082: step: 348/77, loss: 0.023499522358179092 2023-01-22 07:37:42.242606: step: 352/77, loss: 0.2161271721124649 2023-01-22 07:37:43.585020: step: 356/77, loss: 0.018633641302585602 2023-01-22 07:37:44.888189: step: 360/77, loss: 0.017180128023028374 2023-01-22 07:37:46.222970: step: 364/77, loss: 0.14562822878360748 2023-01-22 07:37:47.537419: step: 368/77, loss: 0.04934248328208923 2023-01-22 07:37:48.835965: step: 372/77, loss: 0.004744932055473328 2023-01-22 07:37:50.132802: step: 376/77, loss: 0.03900888189673424 2023-01-22 07:37:51.464926: step: 380/77, loss: 0.014300035312771797 2023-01-22 07:37:52.777024: step: 384/77, loss: 0.02475110813975334 2023-01-22 07:37:54.111357: step: 388/77, loss: 0.0035867562983185053 ================================================== Loss: 0.025 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Chinese: {'template': {'p': 0.9390243902439024, 'r': 0.6260162601626016, 'f1': 0.751219512195122}, 'slot': {'p': 0.4594594594594595, 'r': 0.015454545454545455, 'f1': 0.029903254177660512}, 'combined': 0.022463908016388874, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Korean: {'template': {'p': 0.9390243902439024, 'r': 0.6260162601626016, 'f1': 0.751219512195122}, 'slot': {'p': 0.4594594594594595, 'r': 0.015454545454545455, 'f1': 0.029903254177660512}, 'combined': 0.022463908016388874, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Russian: {'template': {'p': 0.9390243902439024, 'r': 0.6260162601626016, 'f1': 0.751219512195122}, 'slot': {'p': 0.4594594594594595, 'r': 0.015454545454545455, 'f1': 0.029903254177660512}, 'combined': 0.022463908016388874, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 8 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:39:32.572099: step: 4/77, loss: 0.010056269355118275 2023-01-22 07:39:33.834538: step: 8/77, loss: 0.018890120089054108 2023-01-22 07:39:35.143073: step: 12/77, loss: 0.017161967232823372 2023-01-22 07:39:36.394790: step: 16/77, loss: 0.02055450901389122 2023-01-22 07:39:37.614189: step: 20/77, loss: 0.024720754474401474 2023-01-22 07:39:38.874501: step: 24/77, loss: 0.010491413995623589 2023-01-22 07:39:40.160996: step: 28/77, loss: 0.0037658684886991978 2023-01-22 07:39:41.477052: step: 32/77, loss: 0.038764867931604385 2023-01-22 07:39:42.793605: step: 36/77, loss: 0.03026004135608673 2023-01-22 07:39:44.036576: step: 40/77, loss: 0.02923579327762127 2023-01-22 07:39:45.380425: step: 44/77, loss: 0.04140114411711693 2023-01-22 07:39:46.633322: step: 48/77, loss: 0.033918458968400955 2023-01-22 07:39:47.923690: step: 52/77, loss: 0.00033601350151002407 2023-01-22 07:39:49.229678: step: 56/77, loss: 0.0021884054876863956 2023-01-22 07:39:50.529597: step: 60/77, loss: 0.005278467666357756 2023-01-22 07:39:51.818210: step: 64/77, loss: 0.07367675006389618 2023-01-22 07:39:53.097805: step: 68/77, loss: 0.05771408602595329 2023-01-22 07:39:54.398775: step: 72/77, loss: 0.08574645966291428 2023-01-22 07:39:55.711179: step: 76/77, loss: 0.013951526023447514 2023-01-22 07:39:56.979021: step: 80/77, loss: 0.016018124297261238 2023-01-22 07:39:58.268877: step: 84/77, loss: 0.00187723059207201 2023-01-22 07:39:59.580844: step: 88/77, loss: 0.002696490380913019 2023-01-22 07:40:00.925248: step: 92/77, loss: 0.013174856081604958 2023-01-22 07:40:02.281240: step: 96/77, loss: 0.005178231745958328 2023-01-22 07:40:03.632501: step: 100/77, loss: 0.019260739907622337 2023-01-22 07:40:04.981579: step: 104/77, loss: 0.05311047285795212 2023-01-22 07:40:06.251168: step: 108/77, loss: 0.01540825143456459 2023-01-22 07:40:07.549203: step: 112/77, loss: 0.006137767806649208 2023-01-22 07:40:08.821219: step: 116/77, loss: 0.01354296412318945 2023-01-22 07:40:10.195561: step: 120/77, loss: 0.016296926885843277 2023-01-22 07:40:11.510778: step: 124/77, loss: 0.0025672190822660923 2023-01-22 07:40:12.811429: step: 128/77, loss: 0.0595506876707077 2023-01-22 07:40:14.119958: step: 132/77, loss: 0.01555662415921688 2023-01-22 07:40:15.413828: step: 136/77, loss: 0.0031445412896573544 2023-01-22 07:40:16.728574: step: 140/77, loss: 0.0343557633459568 2023-01-22 07:40:18.006646: step: 144/77, loss: 0.00534120062366128 2023-01-22 07:40:19.292706: step: 148/77, loss: 0.015882568433880806 2023-01-22 07:40:20.535294: step: 152/77, loss: 0.0034859557636082172 2023-01-22 07:40:21.842243: step: 156/77, loss: 0.0897187888622284 2023-01-22 07:40:23.111300: step: 160/77, loss: 0.03130680322647095 2023-01-22 07:40:24.440251: step: 164/77, loss: 0.005163657478988171 2023-01-22 07:40:25.784146: step: 168/77, loss: 0.014301144517958164 2023-01-22 07:40:27.119128: step: 172/77, loss: 0.002605058718472719 2023-01-22 07:40:28.411281: step: 176/77, loss: 0.004895820282399654 2023-01-22 07:40:29.695836: step: 180/77, loss: 0.0005318675539456308 2023-01-22 07:40:30.991757: step: 184/77, loss: 0.02352241799235344 2023-01-22 07:40:32.289044: step: 188/77, loss: 0.009760452434420586 2023-01-22 07:40:33.632063: step: 192/77, loss: 0.0005129415076225996 2023-01-22 07:40:34.931822: step: 196/77, loss: 0.04506408050656319 2023-01-22 07:40:36.224613: step: 200/77, loss: 0.009796293452382088 2023-01-22 07:40:37.585112: step: 204/77, loss: 0.00035782958730123937 2023-01-22 07:40:38.922610: step: 208/77, loss: 0.031156811863183975 2023-01-22 07:40:40.279799: step: 212/77, loss: 0.038962021470069885 2023-01-22 07:40:41.571408: step: 216/77, loss: 0.003285457845777273 2023-01-22 07:40:42.900138: step: 220/77, loss: 0.030111942440271378 2023-01-22 07:40:44.225620: step: 224/77, loss: 0.0034210169687867165 2023-01-22 07:40:45.458818: step: 228/77, loss: 0.003368095261976123 2023-01-22 07:40:46.782476: step: 232/77, loss: 0.028834929689764977 2023-01-22 07:40:48.118929: step: 236/77, loss: 0.005496030207723379 2023-01-22 07:40:49.399075: step: 240/77, loss: 0.0867997258901596 2023-01-22 07:40:50.705261: step: 244/77, loss: 0.013414930552244186 2023-01-22 07:40:52.022592: step: 248/77, loss: 0.11154910922050476 2023-01-22 07:40:53.361207: step: 252/77, loss: 0.01679971069097519 2023-01-22 07:40:54.697450: step: 256/77, loss: 0.016359906643629074 2023-01-22 07:40:56.011161: step: 260/77, loss: 0.03159204125404358 2023-01-22 07:40:57.333555: step: 264/77, loss: 0.006481696851551533 2023-01-22 07:40:58.671718: step: 268/77, loss: 0.0011871152091771364 2023-01-22 07:40:59.978960: step: 272/77, loss: 0.02756771445274353 2023-01-22 07:41:01.220980: step: 276/77, loss: 0.006530491169542074 2023-01-22 07:41:02.521557: step: 280/77, loss: 0.04043383151292801 2023-01-22 07:41:03.839249: step: 284/77, loss: 0.03230508789420128 2023-01-22 07:41:05.187859: step: 288/77, loss: 0.014443744905292988 2023-01-22 07:41:06.452432: step: 292/77, loss: 0.009209951385855675 2023-01-22 07:41:07.716070: step: 296/77, loss: 0.019323663786053658 2023-01-22 07:41:09.067701: step: 300/77, loss: 0.09706917405128479 2023-01-22 07:41:10.348400: step: 304/77, loss: 0.017132218927145004 2023-01-22 07:41:11.662112: step: 308/77, loss: 0.008376318961381912 2023-01-22 07:41:12.959375: step: 312/77, loss: 0.018071360886096954 2023-01-22 07:41:14.280106: step: 316/77, loss: 0.021176446229219437 2023-01-22 07:41:15.567294: step: 320/77, loss: 0.008609825745224953 2023-01-22 07:41:16.890331: step: 324/77, loss: 0.015605449676513672 2023-01-22 07:41:18.183871: step: 328/77, loss: 0.027690229937434196 2023-01-22 07:41:19.488600: step: 332/77, loss: 0.10342463850975037 2023-01-22 07:41:20.782446: step: 336/77, loss: 0.07532121986150742 2023-01-22 07:41:22.109393: step: 340/77, loss: 0.0045474437065422535 2023-01-22 07:41:23.415536: step: 344/77, loss: 0.014486493542790413 2023-01-22 07:41:24.714523: step: 348/77, loss: 0.015550438314676285 2023-01-22 07:41:26.036348: step: 352/77, loss: 0.0368066281080246 2023-01-22 07:41:27.320233: step: 356/77, loss: 0.007086616940796375 2023-01-22 07:41:28.623680: step: 360/77, loss: 0.017183110117912292 2023-01-22 07:41:29.960048: step: 364/77, loss: 0.011694397777318954 2023-01-22 07:41:31.237513: step: 368/77, loss: 0.011689573526382446 2023-01-22 07:41:32.509865: step: 372/77, loss: 0.08766619861125946 2023-01-22 07:41:33.859223: step: 376/77, loss: 0.0017116167582571507 2023-01-22 07:41:35.133883: step: 380/77, loss: 0.0179583802819252 2023-01-22 07:41:36.414912: step: 384/77, loss: 0.0024103238247334957 2023-01-22 07:41:37.755596: step: 388/77, loss: 0.004919702652841806 ================================================== Loss: 0.023 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Chinese: {'template': {'p': 0.9746835443037974, 'r': 0.6260162601626016, 'f1': 0.7623762376237624}, 'slot': {'p': 0.4146341463414634, 'r': 0.015454545454545455, 'f1': 0.02979842243645925}, 'combined': 0.02271760918423131, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Korean: {'template': {'p': 0.9746835443037974, 'r': 0.6260162601626016, 'f1': 0.7623762376237624}, 'slot': {'p': 0.4146341463414634, 'r': 0.015454545454545455, 'f1': 0.02979842243645925}, 'combined': 0.02271760918423131, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Russian: {'template': {'p': 0.9746835443037974, 'r': 0.6260162601626016, 'f1': 0.7623762376237624}, 'slot': {'p': 0.4146341463414634, 'r': 0.015454545454545455, 'f1': 0.02979842243645925}, 'combined': 0.02271760918423131, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 9 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:43:16.303224: step: 4/77, loss: 0.009951209649443626 2023-01-22 07:43:17.654690: step: 8/77, loss: 0.01025527622550726 2023-01-22 07:43:18.932759: step: 12/77, loss: 0.010599697008728981 2023-01-22 07:43:20.215956: step: 16/77, loss: 2.8813841709052213e-05 2023-01-22 07:43:21.529569: step: 20/77, loss: 0.015449351631104946 2023-01-22 07:43:22.810644: step: 24/77, loss: 0.013126425445079803 2023-01-22 07:43:24.117672: step: 28/77, loss: 0.024598799645900726 2023-01-22 07:43:25.394495: step: 32/77, loss: 0.015080241486430168 2023-01-22 07:43:26.669282: step: 36/77, loss: 0.04678387939929962 2023-01-22 07:43:27.953064: step: 40/77, loss: 0.012904101982712746 2023-01-22 07:43:29.260919: step: 44/77, loss: 0.015326356515288353 2023-01-22 07:43:30.548624: step: 48/77, loss: 0.021616067737340927 2023-01-22 07:43:31.835951: step: 52/77, loss: 0.016074180603027344 2023-01-22 07:43:33.083702: step: 56/77, loss: 0.013141414150595665 2023-01-22 07:43:34.413347: step: 60/77, loss: 0.05112868547439575 2023-01-22 07:43:35.693371: step: 64/77, loss: 0.0026340484619140625 2023-01-22 07:43:36.965059: step: 68/77, loss: 0.002100118901580572 2023-01-22 07:43:38.261352: step: 72/77, loss: 0.17738491296768188 2023-01-22 07:43:39.558567: step: 76/77, loss: 0.037238575518131256 2023-01-22 07:43:40.831832: step: 80/77, loss: 0.01159091666340828 2023-01-22 07:43:42.132476: step: 84/77, loss: 8.563547453377396e-05 2023-01-22 07:43:43.444509: step: 88/77, loss: 0.0001513077295385301 2023-01-22 07:43:44.744211: step: 92/77, loss: 0.02696680650115013 2023-01-22 07:43:46.013391: step: 96/77, loss: 0.0179781224578619 2023-01-22 07:43:47.294198: step: 100/77, loss: 0.06407640874385834 2023-01-22 07:43:48.632164: step: 104/77, loss: 0.0012260295916348696 2023-01-22 07:43:49.926978: step: 108/77, loss: 0.007572871632874012 2023-01-22 07:43:51.188489: step: 112/77, loss: 0.005561821162700653 2023-01-22 07:43:52.517882: step: 116/77, loss: 0.010646567679941654 2023-01-22 07:43:53.865324: step: 120/77, loss: 0.012919003143906593 2023-01-22 07:43:55.118072: step: 124/77, loss: 0.012604203075170517 2023-01-22 07:43:56.398499: step: 128/77, loss: 0.05541830509901047 2023-01-22 07:43:57.704249: step: 132/77, loss: 0.03553691506385803 2023-01-22 07:43:58.997658: step: 136/77, loss: 0.01568058505654335 2023-01-22 07:44:00.343145: step: 140/77, loss: 0.03287587687373161 2023-01-22 07:44:01.617893: step: 144/77, loss: 0.0027599541936069727 2023-01-22 07:44:02.963452: step: 148/77, loss: 0.01692170463502407 2023-01-22 07:44:04.233807: step: 152/77, loss: 0.08928981423377991 2023-01-22 07:44:05.570079: step: 156/77, loss: 0.04975442215800285 2023-01-22 07:44:06.880405: step: 160/77, loss: 0.02008040063083172 2023-01-22 07:44:08.168179: step: 164/77, loss: 0.0019231241894885898 2023-01-22 07:44:09.491154: step: 168/77, loss: 0.0244793388992548 2023-01-22 07:44:10.765656: step: 172/77, loss: 0.010686839930713177 2023-01-22 07:44:12.044849: step: 176/77, loss: 0.0025486641097813845 2023-01-22 07:44:13.339011: step: 180/77, loss: 0.0068518416956067085 2023-01-22 07:44:14.612731: step: 184/77, loss: 0.014136513695120811 2023-01-22 07:44:15.928586: step: 188/77, loss: 0.03422325477004051 2023-01-22 07:44:17.250235: step: 192/77, loss: 0.0019678741227835417 2023-01-22 07:44:18.567975: step: 196/77, loss: 0.003199803875759244 2023-01-22 07:44:19.818719: step: 200/77, loss: 0.00686542596668005 2023-01-22 07:44:21.124189: step: 204/77, loss: 0.00038556900108233094 2023-01-22 07:44:22.443790: step: 208/77, loss: 0.00295876432210207 2023-01-22 07:44:23.726256: step: 212/77, loss: 8.276679000118747e-05 2023-01-22 07:44:25.030120: step: 216/77, loss: 0.009483201429247856 2023-01-22 07:44:26.306107: step: 220/77, loss: 0.009370522573590279 2023-01-22 07:44:27.582433: step: 224/77, loss: 0.025101710110902786 2023-01-22 07:44:28.921150: step: 228/77, loss: 0.020489763468503952 2023-01-22 07:44:30.213900: step: 232/77, loss: 0.0010126195847988129 2023-01-22 07:44:31.533986: step: 236/77, loss: 0.02698575146496296 2023-01-22 07:44:32.892454: step: 240/77, loss: 0.04893007129430771 2023-01-22 07:44:34.197624: step: 244/77, loss: 0.012933338060975075 2023-01-22 07:44:35.480063: step: 248/77, loss: 0.016066405922174454 2023-01-22 07:44:36.756638: step: 252/77, loss: 0.00607153307646513 2023-01-22 07:44:38.128298: step: 256/77, loss: 0.0004491469881031662 2023-01-22 07:44:39.487939: step: 260/77, loss: 0.14309868216514587 2023-01-22 07:44:40.770998: step: 264/77, loss: 0.01501578837633133 2023-01-22 07:44:42.100566: step: 268/77, loss: 0.006420220248401165 2023-01-22 07:44:43.438264: step: 272/77, loss: 0.028947584331035614 2023-01-22 07:44:44.732785: step: 276/77, loss: 0.03736587241292 2023-01-22 07:44:46.052615: step: 280/77, loss: 0.0055717648938298225 2023-01-22 07:44:47.326951: step: 284/77, loss: 0.017412912100553513 2023-01-22 07:44:48.617611: step: 288/77, loss: 0.01638699881732464 2023-01-22 07:44:49.912089: step: 292/77, loss: 0.003690283978357911 2023-01-22 07:44:51.191527: step: 296/77, loss: 0.0013558377977460623 2023-01-22 07:44:52.492087: step: 300/77, loss: 0.018798088654875755 2023-01-22 07:44:53.801437: step: 304/77, loss: 0.004140047822147608 2023-01-22 07:44:55.079616: step: 308/77, loss: 0.007700146175920963 2023-01-22 07:44:56.348528: step: 312/77, loss: 0.017628833651542664 2023-01-22 07:44:57.621034: step: 316/77, loss: 0.016042888164520264 2023-01-22 07:44:58.918815: step: 320/77, loss: 0.01528505515307188 2023-01-22 07:45:00.250988: step: 324/77, loss: 0.02525682933628559 2023-01-22 07:45:01.599281: step: 328/77, loss: 0.02757682465016842 2023-01-22 07:45:02.896860: step: 332/77, loss: 0.10388769954442978 2023-01-22 07:45:04.272544: step: 336/77, loss: 0.012050546705722809 2023-01-22 07:45:05.611291: step: 340/77, loss: 0.07141268998384476 2023-01-22 07:45:06.901187: step: 344/77, loss: 0.01598818600177765 2023-01-22 07:45:08.221380: step: 348/77, loss: 0.017102017998695374 2023-01-22 07:45:09.491966: step: 352/77, loss: 0.0203529205173254 2023-01-22 07:45:10.724683: step: 356/77, loss: 0.05472177267074585 2023-01-22 07:45:12.024815: step: 360/77, loss: 0.10159629583358765 2023-01-22 07:45:13.332539: step: 364/77, loss: 0.011427942663431168 2023-01-22 07:45:14.595119: step: 368/77, loss: 0.047470469027757645 2023-01-22 07:45:15.940439: step: 372/77, loss: 0.00397944450378418 2023-01-22 07:45:17.242845: step: 376/77, loss: 0.030490266159176826 2023-01-22 07:45:18.559247: step: 380/77, loss: 0.02138015814125538 2023-01-22 07:45:19.876519: step: 384/77, loss: 0.049235884100198746 2023-01-22 07:45:21.187878: step: 388/77, loss: 0.013251261785626411 ================================================== Loss: 0.023 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Chinese: {'template': {'p': 0.9634146341463414, 'r': 0.6422764227642277, 'f1': 0.7707317073170733}, 'slot': {'p': 0.5588235294117647, 'r': 0.017272727272727273, 'f1': 0.03350970017636684}, 'combined': 0.025826988428614448, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 9} Test Korean: {'template': {'p': 0.9753086419753086, 'r': 0.6422764227642277, 'f1': 0.7745098039215687}, 'slot': {'p': 0.5757575757575758, 'r': 0.017272727272727273, 'f1': 0.033539276257722864}, 'combined': 0.025976498278040258, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Russian: {'template': {'p': 0.9753086419753086, 'r': 0.6422764227642277, 'f1': 0.7745098039215687}, 'slot': {'p': 0.5757575757575758, 'r': 0.017272727272727273, 'f1': 0.033539276257722864}, 'combined': 0.025976498278040258, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 10 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:46:59.412125: step: 4/77, loss: 0.03768238425254822 2023-01-22 07:47:00.738585: step: 8/77, loss: 0.028346769511699677 2023-01-22 07:47:02.025951: step: 12/77, loss: 0.00744956498965621 2023-01-22 07:47:03.338381: step: 16/77, loss: 0.048924703150987625 2023-01-22 07:47:04.669342: step: 20/77, loss: 0.03151417896151543 2023-01-22 07:47:05.966676: step: 24/77, loss: 0.002606735099107027 2023-01-22 07:47:07.226255: step: 28/77, loss: 0.0024922217708081007 2023-01-22 07:47:08.580659: step: 32/77, loss: 0.048660993576049805 2023-01-22 07:47:09.905471: step: 36/77, loss: 0.004283279180526733 2023-01-22 07:47:11.193617: step: 40/77, loss: 0.04208429902791977 2023-01-22 07:47:12.508976: step: 44/77, loss: 0.01696396991610527 2023-01-22 07:47:13.786033: step: 48/77, loss: 0.010859989561140537 2023-01-22 07:47:15.060508: step: 52/77, loss: 0.006402358412742615 2023-01-22 07:47:16.323453: step: 56/77, loss: 0.0019209606107324362 2023-01-22 07:47:17.631640: step: 60/77, loss: 0.0024111694656312466 2023-01-22 07:47:18.951384: step: 64/77, loss: 0.00038403054350055754 2023-01-22 07:47:20.316908: step: 68/77, loss: 0.0019636116921901703 2023-01-22 07:47:21.601026: step: 72/77, loss: 0.015139022842049599 2023-01-22 07:47:22.918204: step: 76/77, loss: 0.018672920763492584 2023-01-22 07:47:24.245622: step: 80/77, loss: 0.0020270387176424265 2023-01-22 07:47:25.483091: step: 84/77, loss: 0.009439261630177498 2023-01-22 07:47:26.776656: step: 88/77, loss: 0.008923723362386227 2023-01-22 07:47:28.012951: step: 92/77, loss: 0.00011983791046077386 2023-01-22 07:47:29.301175: step: 96/77, loss: 0.006050101015716791 2023-01-22 07:47:30.633957: step: 100/77, loss: 0.0179508775472641 2023-01-22 07:47:31.902104: step: 104/77, loss: 0.017849788069725037 2023-01-22 07:47:33.170051: step: 108/77, loss: 0.017486661672592163 2023-01-22 07:47:34.472447: step: 112/77, loss: 0.007766797207295895 2023-01-22 07:47:35.746853: step: 116/77, loss: 0.019981278106570244 2023-01-22 07:47:37.054335: step: 120/77, loss: 0.002379945944994688 2023-01-22 07:47:38.282615: step: 124/77, loss: 0.011582271195948124 2023-01-22 07:47:39.567752: step: 128/77, loss: 0.040890343487262726 2023-01-22 07:47:40.827904: step: 132/77, loss: 0.019269846379756927 2023-01-22 07:47:42.136989: step: 136/77, loss: 0.03777790069580078 2023-01-22 07:47:43.418164: step: 140/77, loss: 0.00588800897821784 2023-01-22 07:47:44.703966: step: 144/77, loss: 0.04820588603615761 2023-01-22 07:47:45.999233: step: 148/77, loss: 0.009185411036014557 2023-01-22 07:47:47.300843: step: 152/77, loss: 0.002547596348449588 2023-01-22 07:47:48.594623: step: 156/77, loss: 0.030245959758758545 2023-01-22 07:47:49.932170: step: 160/77, loss: 0.0031875655986368656 2023-01-22 07:47:51.182307: step: 164/77, loss: 0.053960613906383514 2023-01-22 07:47:52.483721: step: 168/77, loss: 0.009313058108091354 2023-01-22 07:47:53.758495: step: 172/77, loss: 0.03616435080766678 2023-01-22 07:47:55.097454: step: 176/77, loss: 0.006441659759730101 2023-01-22 07:47:56.474770: step: 180/77, loss: 0.012755388393998146 2023-01-22 07:47:57.744182: step: 184/77, loss: 0.01913641393184662 2023-01-22 07:47:59.072172: step: 188/77, loss: 0.011171862483024597 2023-01-22 07:48:00.339415: step: 192/77, loss: 0.05577773600816727 2023-01-22 07:48:01.624310: step: 196/77, loss: 0.014128005132079124 2023-01-22 07:48:02.886410: step: 200/77, loss: 0.0018707435810938478 2023-01-22 07:48:04.155413: step: 204/77, loss: 0.0009870977373793721 2023-01-22 07:48:05.398990: step: 208/77, loss: 0.015966808423399925 2023-01-22 07:48:06.708948: step: 212/77, loss: 0.003523885505273938 2023-01-22 07:48:07.987665: step: 216/77, loss: 0.0007100008078850806 2023-01-22 07:48:09.344595: step: 220/77, loss: 0.009846840053796768 2023-01-22 07:48:10.622712: step: 224/77, loss: 0.003518306650221348 2023-01-22 07:48:11.938654: step: 228/77, loss: 0.013546239584684372 2023-01-22 07:48:13.195765: step: 232/77, loss: 0.008653189055621624 2023-01-22 07:48:14.525511: step: 236/77, loss: 0.0033769761212170124 2023-01-22 07:48:15.821409: step: 240/77, loss: 0.009792322292923927 2023-01-22 07:48:17.094842: step: 244/77, loss: 0.07129838317632675 2023-01-22 07:48:18.395713: step: 248/77, loss: 0.5841067433357239 2023-01-22 07:48:19.672195: step: 252/77, loss: 0.0452253594994545 2023-01-22 07:48:20.954710: step: 256/77, loss: 0.014134548604488373 2023-01-22 07:48:22.273135: step: 260/77, loss: 0.007768142968416214 2023-01-22 07:48:23.530154: step: 264/77, loss: 0.006193614564836025 2023-01-22 07:48:24.880676: step: 268/77, loss: 0.035763513296842575 2023-01-22 07:48:26.155855: step: 272/77, loss: 0.0006083827465772629 2023-01-22 07:48:27.460508: step: 276/77, loss: 0.07475440204143524 2023-01-22 07:48:28.750471: step: 280/77, loss: 0.0016128707211464643 2023-01-22 07:48:30.056842: step: 284/77, loss: 0.0022124836686998606 2023-01-22 07:48:31.342888: step: 288/77, loss: 0.03743232414126396 2023-01-22 07:48:32.691001: step: 292/77, loss: 0.008901816792786121 2023-01-22 07:48:33.940718: step: 296/77, loss: 0.0004599709063768387 2023-01-22 07:48:35.237760: step: 300/77, loss: 0.019326256588101387 2023-01-22 07:48:36.539292: step: 304/77, loss: 0.05005129799246788 2023-01-22 07:48:37.824739: step: 308/77, loss: 0.021847281605005264 2023-01-22 07:48:39.081936: step: 312/77, loss: 0.01302673015743494 2023-01-22 07:48:40.335368: step: 316/77, loss: 0.03336362540721893 2023-01-22 07:48:41.629607: step: 320/77, loss: 0.035148534923791885 2023-01-22 07:48:42.913209: step: 324/77, loss: 0.02668309211730957 2023-01-22 07:48:44.173137: step: 328/77, loss: 0.010160962119698524 2023-01-22 07:48:45.442942: step: 332/77, loss: 0.018455471843481064 2023-01-22 07:48:46.760474: step: 336/77, loss: 0.0010307406773790717 2023-01-22 07:48:48.060617: step: 340/77, loss: 0.0395384207367897 2023-01-22 07:48:49.313450: step: 344/77, loss: 0.031024860218167305 2023-01-22 07:48:50.654751: step: 348/77, loss: 0.007934034802019596 2023-01-22 07:48:51.984641: step: 352/77, loss: 0.020924028009176254 2023-01-22 07:48:53.271384: step: 356/77, loss: 0.007340208627283573 2023-01-22 07:48:54.522681: step: 360/77, loss: 0.026930680498480797 2023-01-22 07:48:55.793950: step: 364/77, loss: 0.017858322709798813 2023-01-22 07:48:57.142973: step: 368/77, loss: 0.018936611711978912 2023-01-22 07:48:58.428800: step: 372/77, loss: 0.0034472201950848103 2023-01-22 07:48:59.780546: step: 376/77, loss: 0.03166472539305687 2023-01-22 07:49:01.092216: step: 380/77, loss: 0.0009081726893782616 2023-01-22 07:49:02.463090: step: 384/77, loss: 0.02587328478693962 2023-01-22 07:49:03.777422: step: 388/77, loss: 0.0031165271066129208 ================================================== Loss: 0.024 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Chinese: {'template': {'p': 0.9594594594594594, 'r': 0.5772357723577236, 'f1': 0.7208121827411168}, 'slot': {'p': 0.5483870967741935, 'r': 0.015454545454545455, 'f1': 0.030061892130857647}, 'combined': 0.021668978084171505, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Korean: {'template': {'p': 0.958904109589041, 'r': 0.5691056910569106, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5161290322580645, 'r': 0.014545454545454545, 'f1': 0.028293545534924847}, 'combined': 0.020209675382089173, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Russian: {'template': {'p': 0.9594594594594594, 'r': 0.5772357723577236, 'f1': 0.7208121827411168}, 'slot': {'p': 0.5151515151515151, 'r': 0.015454545454545455, 'f1': 0.030008826125330977}, 'combined': 0.02163072746089847, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 11 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:50:39.617775: step: 4/77, loss: 0.00913262739777565 2023-01-22 07:50:40.872672: step: 8/77, loss: 0.023702571168541908 2023-01-22 07:50:42.164863: step: 12/77, loss: 7.023927173577249e-05 2023-01-22 07:50:43.501610: step: 16/77, loss: 0.015598413534462452 2023-01-22 07:50:44.810613: step: 20/77, loss: 0.017305485904216766 2023-01-22 07:50:46.111644: step: 24/77, loss: 0.05371349677443504 2023-01-22 07:50:47.406765: step: 28/77, loss: 0.011336155235767365 2023-01-22 07:50:48.728569: step: 32/77, loss: 0.0017175760585814714 2023-01-22 07:50:49.998234: step: 36/77, loss: 0.00044242324656806886 2023-01-22 07:50:51.321519: step: 40/77, loss: 0.00024273883900605142 2023-01-22 07:50:52.620215: step: 44/77, loss: 0.03868889808654785 2023-01-22 07:50:53.912803: step: 48/77, loss: 0.004320107400417328 2023-01-22 07:50:55.204449: step: 52/77, loss: 0.00013211331679485738 2023-01-22 07:50:56.483464: step: 56/77, loss: 0.014553715474903584 2023-01-22 07:50:57.779454: step: 60/77, loss: 0.0039396206848323345 2023-01-22 07:50:59.092636: step: 64/77, loss: 0.008579259738326073 2023-01-22 07:51:00.405168: step: 68/77, loss: 0.02406681329011917 2023-01-22 07:51:01.726653: step: 72/77, loss: 0.026409517973661423 2023-01-22 07:51:03.013165: step: 76/77, loss: 0.02277933806180954 2023-01-22 07:51:04.330127: step: 80/77, loss: 0.014436050318181515 2023-01-22 07:51:05.642828: step: 84/77, loss: 0.012562789022922516 2023-01-22 07:51:06.949529: step: 88/77, loss: 0.00025714567163959146 2023-01-22 07:51:08.198802: step: 92/77, loss: 0.0011654815170913935 2023-01-22 07:51:09.494397: step: 96/77, loss: 7.128174183890224e-05 2023-01-22 07:51:10.771659: step: 100/77, loss: 0.0016908040270209312 2023-01-22 07:51:12.081223: step: 104/77, loss: 0.05296008661389351 2023-01-22 07:51:13.350834: step: 108/77, loss: 0.0006149305845610797 2023-01-22 07:51:14.585659: step: 112/77, loss: 0.07239086925983429 2023-01-22 07:51:15.884175: step: 116/77, loss: 0.005651980172842741 2023-01-22 07:51:17.111498: step: 120/77, loss: 0.025895126163959503 2023-01-22 07:51:18.403763: step: 124/77, loss: 0.03647073358297348 2023-01-22 07:51:19.683002: step: 128/77, loss: 3.863106030621566e-05 2023-01-22 07:51:20.987547: step: 132/77, loss: 0.009307922795414925 2023-01-22 07:51:22.254434: step: 136/77, loss: 0.0012862995499745011 2023-01-22 07:51:23.559212: step: 140/77, loss: 0.011451378464698792 2023-01-22 07:51:24.816510: step: 144/77, loss: 0.0032769562676548958 2023-01-22 07:51:26.127722: step: 148/77, loss: 0.005432584322988987 2023-01-22 07:51:27.393079: step: 152/77, loss: 0.024685127660632133 2023-01-22 07:51:28.712214: step: 156/77, loss: 0.013838456943631172 2023-01-22 07:51:29.993199: step: 160/77, loss: 0.0025978866033256054 2023-01-22 07:51:31.281186: step: 164/77, loss: 0.008826863020658493 2023-01-22 07:51:32.592219: step: 168/77, loss: 0.005616775713860989 2023-01-22 07:51:33.893678: step: 172/77, loss: 0.008297703228890896 2023-01-22 07:51:35.195808: step: 176/77, loss: 0.009488863870501518 2023-01-22 07:51:36.480811: step: 180/77, loss: 0.0032529172021895647 2023-01-22 07:51:37.762931: step: 184/77, loss: 0.0705006793141365 2023-01-22 07:51:39.050941: step: 188/77, loss: 0.008520006202161312 2023-01-22 07:51:40.341140: step: 192/77, loss: 0.0003739010135177523 2023-01-22 07:51:41.616386: step: 196/77, loss: 0.009315535426139832 2023-01-22 07:51:42.878674: step: 200/77, loss: 0.004182401578873396 2023-01-22 07:51:44.157941: step: 204/77, loss: 0.0474054291844368 2023-01-22 07:51:45.424990: step: 208/77, loss: 0.05022357776761055 2023-01-22 07:51:46.657083: step: 212/77, loss: 0.011805780231952667 2023-01-22 07:51:47.900541: step: 216/77, loss: 0.062389228492975235 2023-01-22 07:51:49.206144: step: 220/77, loss: 0.06036899983882904 2023-01-22 07:51:50.496087: step: 224/77, loss: 0.0013627760345116258 2023-01-22 07:51:51.821224: step: 228/77, loss: 0.04754606634378433 2023-01-22 07:51:53.127457: step: 232/77, loss: 0.004517616704106331 2023-01-22 07:51:54.428054: step: 236/77, loss: 6.662487430730835e-05 2023-01-22 07:51:55.797228: step: 240/77, loss: 0.01598535105586052 2023-01-22 07:51:57.149996: step: 244/77, loss: 0.024257568642497063 2023-01-22 07:51:58.460477: step: 248/77, loss: 0.024137431755661964 2023-01-22 07:51:59.715248: step: 252/77, loss: 0.024435149505734444 2023-01-22 07:52:01.014062: step: 256/77, loss: 0.007032178808003664 2023-01-22 07:52:02.320437: step: 260/77, loss: 0.005066059995442629 2023-01-22 07:52:03.646048: step: 264/77, loss: 0.0007375497953034937 2023-01-22 07:52:04.964793: step: 268/77, loss: 0.0025754738599061966 2023-01-22 07:52:06.242063: step: 272/77, loss: 0.012038455344736576 2023-01-22 07:52:07.533079: step: 276/77, loss: 0.03843052312731743 2023-01-22 07:52:08.822970: step: 280/77, loss: 0.01894669234752655 2023-01-22 07:52:10.116339: step: 284/77, loss: 0.015208638273179531 2023-01-22 07:52:11.458056: step: 288/77, loss: 0.006000806577503681 2023-01-22 07:52:12.782182: step: 292/77, loss: 0.0017294568242505193 2023-01-22 07:52:14.076788: step: 296/77, loss: 0.0027547322679311037 2023-01-22 07:52:15.332220: step: 300/77, loss: 0.002827903488650918 2023-01-22 07:52:16.614064: step: 304/77, loss: 0.024109508842229843 2023-01-22 07:52:17.897491: step: 308/77, loss: 0.03593812137842178 2023-01-22 07:52:19.131994: step: 312/77, loss: 0.003940297290682793 2023-01-22 07:52:20.408572: step: 316/77, loss: 0.04253112152218819 2023-01-22 07:52:21.693729: step: 320/77, loss: 0.056184787303209305 2023-01-22 07:52:23.003139: step: 324/77, loss: 0.0005866457941010594 2023-01-22 07:52:24.287281: step: 328/77, loss: 0.0031070266850292683 2023-01-22 07:52:25.538844: step: 332/77, loss: 0.004228291101753712 2023-01-22 07:52:26.748483: step: 336/77, loss: 0.013488009572029114 2023-01-22 07:52:28.035312: step: 340/77, loss: 0.00031433472759090364 2023-01-22 07:52:29.344881: step: 344/77, loss: 0.01137051172554493 2023-01-22 07:52:30.662501: step: 348/77, loss: 0.02159648761153221 2023-01-22 07:52:31.933281: step: 352/77, loss: 0.0018602788913995028 2023-01-22 07:52:33.259215: step: 356/77, loss: 8.241426985478029e-05 2023-01-22 07:52:34.624251: step: 360/77, loss: 0.008178231306374073 2023-01-22 07:52:35.960922: step: 364/77, loss: 0.053727056831121445 2023-01-22 07:52:37.225022: step: 368/77, loss: 0.08346982300281525 2023-01-22 07:52:38.558944: step: 372/77, loss: 0.008330187760293484 2023-01-22 07:52:39.843766: step: 376/77, loss: 0.10757150501012802 2023-01-22 07:52:41.125247: step: 380/77, loss: 0.07443412393331528 2023-01-22 07:52:42.417908: step: 384/77, loss: 0.0016130568692460656 2023-01-22 07:52:43.742741: step: 388/77, loss: 0.03187939152121544 ================================================== Loss: 0.018 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Chinese: {'template': {'p': 0.9397590361445783, 'r': 0.6341463414634146, 'f1': 0.7572815533980584}, 'slot': {'p': 0.47619047619047616, 'r': 0.01818181818181818, 'f1': 0.0350262697022767}, 'combined': 0.026524747929879446, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Korean: {'template': {'p': 0.9397590361445783, 'r': 0.6341463414634146, 'f1': 0.7572815533980584}, 'slot': {'p': 0.46511627906976744, 'r': 0.01818181818181818, 'f1': 0.034995625546806644}, 'combined': 0.026501541676222512, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Russian: {'template': {'p': 0.9397590361445783, 'r': 0.6341463414634146, 'f1': 0.7572815533980584}, 'slot': {'p': 0.46511627906976744, 'r': 0.01818181818181818, 'f1': 0.034995625546806644}, 'combined': 0.026501541676222512, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 12 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:54:20.471143: step: 4/77, loss: 0.026703253388404846 2023-01-22 07:54:21.723545: step: 8/77, loss: 6.608450348721817e-05 2023-01-22 07:54:22.954591: step: 12/77, loss: 0.0021594602148979902 2023-01-22 07:54:24.211045: step: 16/77, loss: 0.0006289744051173329 2023-01-22 07:54:25.494158: step: 20/77, loss: 0.018942205235362053 2023-01-22 07:54:26.798079: step: 24/77, loss: 0.013272013515233994 2023-01-22 07:54:28.062541: step: 28/77, loss: 0.01688517816364765 2023-01-22 07:54:29.332871: step: 32/77, loss: 0.01106287818402052 2023-01-22 07:54:30.609218: step: 36/77, loss: 0.032344695180654526 2023-01-22 07:54:31.907546: step: 40/77, loss: 0.0002201721363235265 2023-01-22 07:54:33.218992: step: 44/77, loss: 0.0014146651374176145 2023-01-22 07:54:34.489961: step: 48/77, loss: 0.011268083937466145 2023-01-22 07:54:35.826796: step: 52/77, loss: 0.01278699655085802 2023-01-22 07:54:37.152212: step: 56/77, loss: 0.012389368377625942 2023-01-22 07:54:38.383697: step: 60/77, loss: 0.02331053465604782 2023-01-22 07:54:39.623921: step: 64/77, loss: 0.01478915847837925 2023-01-22 07:54:40.929350: step: 68/77, loss: 0.006758468225598335 2023-01-22 07:54:42.234586: step: 72/77, loss: 0.00636525172740221 2023-01-22 07:54:43.538540: step: 76/77, loss: 0.0365261435508728 2023-01-22 07:54:44.836792: step: 80/77, loss: 0.007071449421346188 2023-01-22 07:54:46.084212: step: 84/77, loss: 0.0159720741212368 2023-01-22 07:54:47.409637: step: 88/77, loss: 3.108736564172432e-05 2023-01-22 07:54:48.684724: step: 92/77, loss: 0.002529098652303219 2023-01-22 07:54:49.967389: step: 96/77, loss: 0.01439334824681282 2023-01-22 07:54:51.205136: step: 100/77, loss: 0.01917685568332672 2023-01-22 07:54:52.498936: step: 104/77, loss: 0.004402280319482088 2023-01-22 07:54:53.749800: step: 108/77, loss: 0.004489831626415253 2023-01-22 07:54:55.053111: step: 112/77, loss: 0.04092923924326897 2023-01-22 07:54:56.318985: step: 116/77, loss: 0.039381761103868484 2023-01-22 07:54:57.549879: step: 120/77, loss: 0.013376251794397831 2023-01-22 07:54:58.843513: step: 124/77, loss: 0.0016149815637618303 2023-01-22 07:55:00.121646: step: 128/77, loss: 0.00484588835388422 2023-01-22 07:55:01.397158: step: 132/77, loss: 0.03189440071582794 2023-01-22 07:55:02.696065: step: 136/77, loss: 0.00017303042113780975 2023-01-22 07:55:04.027266: step: 140/77, loss: 0.00938489194959402 2023-01-22 07:55:05.322841: step: 144/77, loss: 0.01806403324007988 2023-01-22 07:55:06.613945: step: 148/77, loss: 1.3843055057805032e-06 2023-01-22 07:55:07.917615: step: 152/77, loss: 0.0012917781714349985 2023-01-22 07:55:09.191520: step: 156/77, loss: 0.00010156808275496587 2023-01-22 07:55:10.515475: step: 160/77, loss: 5.2645496907643974e-05 2023-01-22 07:55:11.800431: step: 164/77, loss: 0.018050380051136017 2023-01-22 07:55:13.128830: step: 168/77, loss: 0.0017785239033401012 2023-01-22 07:55:14.418329: step: 172/77, loss: 0.004214688669890165 2023-01-22 07:55:15.675811: step: 176/77, loss: 0.004522663075476885 2023-01-22 07:55:16.998330: step: 180/77, loss: 0.004108669701963663 2023-01-22 07:55:18.264993: step: 184/77, loss: 0.005246539134532213 2023-01-22 07:55:19.548121: step: 188/77, loss: 0.03378375247120857 2023-01-22 07:55:20.832073: step: 192/77, loss: 0.01473064161837101 2023-01-22 07:55:22.104619: step: 196/77, loss: 0.12296944111585617 2023-01-22 07:55:23.392488: step: 200/77, loss: 0.038474295288324356 2023-01-22 07:55:24.704305: step: 204/77, loss: 0.027129890397191048 2023-01-22 07:55:26.039810: step: 208/77, loss: 2.7496336770127527e-05 2023-01-22 07:55:27.348897: step: 212/77, loss: 0.0018721886444836855 2023-01-22 07:55:28.642853: step: 216/77, loss: 7.349484076257795e-05 2023-01-22 07:55:29.957326: step: 220/77, loss: 0.028560999780893326 2023-01-22 07:55:31.236238: step: 224/77, loss: 0.00273231347091496 2023-01-22 07:55:32.524388: step: 228/77, loss: 0.017860732972621918 2023-01-22 07:55:33.797813: step: 232/77, loss: 0.0005846356507390738 2023-01-22 07:55:35.119935: step: 236/77, loss: 0.016033103689551353 2023-01-22 07:55:36.424200: step: 240/77, loss: 5.783687447546981e-05 2023-01-22 07:55:37.761284: step: 244/77, loss: 0.012645246461033821 2023-01-22 07:55:39.039933: step: 248/77, loss: 0.01751253381371498 2023-01-22 07:55:40.324765: step: 252/77, loss: 0.0007793352706357837 2023-01-22 07:55:41.679042: step: 256/77, loss: 0.034364163875579834 2023-01-22 07:55:42.924576: step: 260/77, loss: 0.010526706464588642 2023-01-22 07:55:44.218949: step: 264/77, loss: 0.008218883536756039 2023-01-22 07:55:45.470625: step: 268/77, loss: 0.023118719458580017 2023-01-22 07:55:46.770138: step: 272/77, loss: 0.0068781073205173016 2023-01-22 07:55:48.100277: step: 276/77, loss: 0.004687698557972908 2023-01-22 07:55:49.419855: step: 280/77, loss: 0.052164994180202484 2023-01-22 07:55:50.698974: step: 284/77, loss: 0.0036531335208564997 2023-01-22 07:55:52.025979: step: 288/77, loss: 0.019436553120613098 2023-01-22 07:55:53.287736: step: 292/77, loss: 0.00011274051939835772 2023-01-22 07:55:54.598845: step: 296/77, loss: 0.01647758297622204 2023-01-22 07:55:55.883150: step: 300/77, loss: 0.009985197335481644 2023-01-22 07:55:57.236016: step: 304/77, loss: 0.0005102159921079874 2023-01-22 07:55:58.559784: step: 308/77, loss: 0.007778570055961609 2023-01-22 07:55:59.839180: step: 312/77, loss: 6.139573088148609e-05 2023-01-22 07:56:01.134340: step: 316/77, loss: 7.233645737869665e-05 2023-01-22 07:56:02.445672: step: 320/77, loss: 0.0017407573759555817 2023-01-22 07:56:03.802437: step: 324/77, loss: 0.011609593406319618 2023-01-22 07:56:05.095926: step: 328/77, loss: 0.00018634925072547048 2023-01-22 07:56:06.382348: step: 332/77, loss: 0.003662088653072715 2023-01-22 07:56:07.698242: step: 336/77, loss: 0.00013480721099767834 2023-01-22 07:56:09.028255: step: 340/77, loss: 0.02257470041513443 2023-01-22 07:56:10.365192: step: 344/77, loss: 0.04621454328298569 2023-01-22 07:56:11.689106: step: 348/77, loss: 0.0009833829244598746 2023-01-22 07:56:12.994512: step: 352/77, loss: 0.0029590395279228687 2023-01-22 07:56:14.287210: step: 356/77, loss: 0.02674620971083641 2023-01-22 07:56:15.616000: step: 360/77, loss: 0.019447756931185722 2023-01-22 07:56:16.883207: step: 364/77, loss: 0.0001418082683812827 2023-01-22 07:56:18.202328: step: 368/77, loss: 0.014620725065469742 2023-01-22 07:56:19.480826: step: 372/77, loss: 0.026280973106622696 2023-01-22 07:56:20.763424: step: 376/77, loss: 0.00032260813168250024 2023-01-22 07:56:22.020991: step: 380/77, loss: 0.0002889384631998837 2023-01-22 07:56:23.319654: step: 384/77, loss: 0.009093235246837139 2023-01-22 07:56:24.600936: step: 388/77, loss: 0.0010739528806880116 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Chinese: {'template': {'p': 0.9863013698630136, 'r': 0.5853658536585366, 'f1': 0.7346938775510202}, 'slot': {'p': 0.4878048780487805, 'r': 0.01818181818181818, 'f1': 0.035056967572304996}, 'combined': 0.025756139440877134, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Korean: {'template': {'p': 0.9863013698630136, 'r': 0.5853658536585366, 'f1': 0.7346938775510202}, 'slot': {'p': 0.5, 'r': 0.019090909090909092, 'f1': 0.03677758318739055}, 'combined': 0.027020265198899173, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Russian: {'template': {'p': 0.9863013698630136, 'r': 0.5853658536585366, 'f1': 0.7346938775510202}, 'slot': {'p': 0.47619047619047616, 'r': 0.01818181818181818, 'f1': 0.0350262697022767}, 'combined': 0.02573358590371349, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 13 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:58:01.585767: step: 4/77, loss: 2.2610862288274802e-05 2023-01-22 07:58:02.859992: step: 8/77, loss: 0.005637861788272858 2023-01-22 07:58:04.155058: step: 12/77, loss: 0.003688807599246502 2023-01-22 07:58:05.448095: step: 16/77, loss: 0.02665168233215809 2023-01-22 07:58:06.726301: step: 20/77, loss: 0.016970016062259674 2023-01-22 07:58:08.048800: step: 24/77, loss: 1.4817988812865224e-05 2023-01-22 07:58:09.365424: step: 28/77, loss: 6.925305933691561e-05 2023-01-22 07:58:10.662509: step: 32/77, loss: 0.015504427254199982 2023-01-22 07:58:11.900188: step: 36/77, loss: 5.249499008641578e-05 2023-01-22 07:58:13.158570: step: 40/77, loss: 0.008829350583255291 2023-01-22 07:58:14.361642: step: 44/77, loss: 2.025206595135387e-05 2023-01-22 07:58:15.645557: step: 48/77, loss: 4.0971288399305195e-05 2023-01-22 07:58:16.991867: step: 52/77, loss: 0.021271025761961937 2023-01-22 07:58:18.303781: step: 56/77, loss: 0.0010673669166862965 2023-01-22 07:58:19.604202: step: 60/77, loss: 0.0002164973266189918 2023-01-22 07:58:20.872003: step: 64/77, loss: 0.06440609693527222 2023-01-22 07:58:22.171529: step: 68/77, loss: 0.03388819843530655 2023-01-22 07:58:23.445093: step: 72/77, loss: 0.014041759073734283 2023-01-22 07:58:24.746026: step: 76/77, loss: 0.023477237671613693 2023-01-22 07:58:26.017280: step: 80/77, loss: 0.0043355543166399 2023-01-22 07:58:27.257916: step: 84/77, loss: 0.007156530395150185 2023-01-22 07:58:28.518958: step: 88/77, loss: 0.00291500985622406 2023-01-22 07:58:29.819664: step: 92/77, loss: 0.007723457179963589 2023-01-22 07:58:31.067293: step: 96/77, loss: 0.00468220841139555 2023-01-22 07:58:32.336939: step: 100/77, loss: 5.247871013125405e-05 2023-01-22 07:58:33.624908: step: 104/77, loss: 4.394134066387778e-06 2023-01-22 07:58:34.931226: step: 108/77, loss: 6.272006430663168e-05 2023-01-22 07:58:36.205084: step: 112/77, loss: 0.00835472159087658 2023-01-22 07:58:37.442659: step: 116/77, loss: 0.05480552464723587 2023-01-22 07:58:38.756661: step: 120/77, loss: 0.022276371717453003 2023-01-22 07:58:40.121105: step: 124/77, loss: 0.002669721841812134 2023-01-22 07:58:41.420132: step: 128/77, loss: 0.09122525900602341 2023-01-22 07:58:42.749486: step: 132/77, loss: 0.010938310995697975 2023-01-22 07:58:44.052499: step: 136/77, loss: 4.7641540732001886e-05 2023-01-22 07:58:45.400755: step: 140/77, loss: 8.525445446139202e-05 2023-01-22 07:58:46.691465: step: 144/77, loss: 0.008205310441553593 2023-01-22 07:58:48.007922: step: 148/77, loss: 0.02473701536655426 2023-01-22 07:58:49.295176: step: 152/77, loss: 0.025928953662514687 2023-01-22 07:58:50.567428: step: 156/77, loss: 0.0324617400765419 2023-01-22 07:58:51.859262: step: 160/77, loss: 0.0030810926109552383 2023-01-22 07:58:53.211051: step: 164/77, loss: 0.022286182269454002 2023-01-22 07:58:54.514026: step: 168/77, loss: 0.021500881761312485 2023-01-22 07:58:55.837995: step: 172/77, loss: 0.009073024615645409 2023-01-22 07:58:57.105823: step: 176/77, loss: 0.0035487054847180843 2023-01-22 07:58:58.403962: step: 180/77, loss: 0.002166708232834935 2023-01-22 07:58:59.706028: step: 184/77, loss: 0.01816055364906788 2023-01-22 07:59:01.020754: step: 188/77, loss: 0.05229934677481651 2023-01-22 07:59:02.329783: step: 192/77, loss: 0.004905913025140762 2023-01-22 07:59:03.593649: step: 196/77, loss: 0.04279464855790138 2023-01-22 07:59:04.884848: step: 200/77, loss: 1.9669514017550682e-07 2023-01-22 07:59:06.194643: step: 204/77, loss: 0.025499440729618073 2023-01-22 07:59:07.486359: step: 208/77, loss: 0.009133759886026382 2023-01-22 07:59:08.830497: step: 212/77, loss: 0.011703042313456535 2023-01-22 07:59:10.139731: step: 216/77, loss: 0.0017015681369230151 2023-01-22 07:59:11.452853: step: 220/77, loss: 0.03688368201255798 2023-01-22 07:59:12.743385: step: 224/77, loss: 0.03645056113600731 2023-01-22 07:59:14.034375: step: 228/77, loss: 0.022262584418058395 2023-01-22 07:59:15.340423: step: 232/77, loss: 0.00811818242073059 2023-01-22 07:59:16.676925: step: 236/77, loss: 0.05509474128484726 2023-01-22 07:59:17.951559: step: 240/77, loss: 0.08739019930362701 2023-01-22 07:59:19.246078: step: 244/77, loss: 0.03550776094198227 2023-01-22 07:59:20.552648: step: 248/77, loss: 0.0076720230281353 2023-01-22 07:59:21.846539: step: 252/77, loss: 0.03817453980445862 2023-01-22 07:59:23.159261: step: 256/77, loss: 0.00017829393618740141 2023-01-22 07:59:24.477645: step: 260/77, loss: 1.9056222299695946e-05 2023-01-22 07:59:25.759703: step: 264/77, loss: 0.040975864976644516 2023-01-22 07:59:27.075459: step: 268/77, loss: 0.0011579713318496943 2023-01-22 07:59:28.335031: step: 272/77, loss: 0.025947313755750656 2023-01-22 07:59:29.592314: step: 276/77, loss: 0.0001307231286773458 2023-01-22 07:59:30.855108: step: 280/77, loss: 0.03271069750189781 2023-01-22 07:59:32.143582: step: 284/77, loss: 0.015176949091255665 2023-01-22 07:59:33.442208: step: 288/77, loss: 0.0007837112061679363 2023-01-22 07:59:34.771997: step: 292/77, loss: 0.05998732149600983 2023-01-22 07:59:36.099503: step: 296/77, loss: 0.022658394649624825 2023-01-22 07:59:37.347543: step: 300/77, loss: 0.007944842800498009 2023-01-22 07:59:38.647686: step: 304/77, loss: 0.03138115629553795 2023-01-22 07:59:39.958765: step: 308/77, loss: 0.003645453369244933 2023-01-22 07:59:41.239294: step: 312/77, loss: 0.0005130755598656833 2023-01-22 07:59:42.537654: step: 316/77, loss: 0.008485173806548119 2023-01-22 07:59:43.813394: step: 320/77, loss: 8.350548159796745e-05 2023-01-22 07:59:45.103970: step: 324/77, loss: 0.014045147225260735 2023-01-22 07:59:46.386387: step: 328/77, loss: 0.002589387120679021 2023-01-22 07:59:47.693181: step: 332/77, loss: 1.4230943634174764e-05 2023-01-22 07:59:49.037400: step: 336/77, loss: 0.00667186314240098 2023-01-22 07:59:50.330671: step: 340/77, loss: 0.01891062594950199 2023-01-22 07:59:51.661023: step: 344/77, loss: 0.04488614574074745 2023-01-22 07:59:52.962345: step: 348/77, loss: 0.019665377214550972 2023-01-22 07:59:54.308867: step: 352/77, loss: 0.007895408198237419 2023-01-22 07:59:55.656439: step: 356/77, loss: 0.001936072949320078 2023-01-22 07:59:56.974576: step: 360/77, loss: 0.036314986646175385 2023-01-22 07:59:58.240554: step: 364/77, loss: 0.002707503968849778 2023-01-22 07:59:59.518990: step: 368/77, loss: 0.006123952567577362 2023-01-22 08:00:00.833362: step: 372/77, loss: 0.05331498384475708 2023-01-22 08:00:02.159613: step: 376/77, loss: 0.009307156316936016 2023-01-22 08:00:03.429294: step: 380/77, loss: 0.00793666671961546 2023-01-22 08:00:04.715751: step: 384/77, loss: 0.07914161682128906 2023-01-22 08:00:06.050179: step: 388/77, loss: 0.04725559428334236 ================================================== Loss: 0.018 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Chinese: {'template': {'p': 0.95, 'r': 0.6178861788617886, 'f1': 0.748768472906404}, 'slot': {'p': 0.5, 'r': 0.025454545454545455, 'f1': 0.04844290657439446}, 'combined': 0.036272521178856945, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Korean: {'template': {'p': 0.95, 'r': 0.6178861788617886, 'f1': 0.748768472906404}, 'slot': {'p': 0.49122807017543857, 'r': 0.025454545454545455, 'f1': 0.0484010371650821}, 'combined': 0.036241170685184634, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Russian: {'template': {'p': 0.95, 'r': 0.6178861788617886, 'f1': 0.748768472906404}, 'slot': {'p': 0.5087719298245614, 'r': 0.026363636363636363, 'f1': 0.05012964563526361}, 'combined': 0.037535498209655516, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 14 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:01:42.972240: step: 4/77, loss: 0.004430257249623537 2023-01-22 08:01:44.279941: step: 8/77, loss: 0.0018078071298077703 2023-01-22 08:01:45.535686: step: 12/77, loss: 0.0023698084987699986 2023-01-22 08:01:46.815426: step: 16/77, loss: 0.015932057052850723 2023-01-22 08:01:48.136363: step: 20/77, loss: 0.00013169035082682967 2023-01-22 08:01:49.377977: step: 24/77, loss: 0.0037448059301823378 2023-01-22 08:01:50.693850: step: 28/77, loss: 0.03657084330916405 2023-01-22 08:01:51.943316: step: 32/77, loss: 0.04973209649324417 2023-01-22 08:01:53.246833: step: 36/77, loss: 0.026404602453112602 2023-01-22 08:01:54.551206: step: 40/77, loss: 0.00232500908896327 2023-01-22 08:01:55.839728: step: 44/77, loss: 0.01054262463003397 2023-01-22 08:01:57.086465: step: 48/77, loss: 0.008980286307632923 2023-01-22 08:01:58.362970: step: 52/77, loss: 0.01709909178316593 2023-01-22 08:01:59.638362: step: 56/77, loss: 0.0010604806011542678 2023-01-22 08:02:00.956351: step: 60/77, loss: 0.0008716561715118587 2023-01-22 08:02:02.199014: step: 64/77, loss: 0.0012432102812454104 2023-01-22 08:02:03.548317: step: 68/77, loss: 0.05664519965648651 2023-01-22 08:02:04.853240: step: 72/77, loss: 0.06241287291049957 2023-01-22 08:02:06.143074: step: 76/77, loss: 0.00022028060629963875 2023-01-22 08:02:07.434299: step: 80/77, loss: 0.08272923529148102 2023-01-22 08:02:08.764465: step: 84/77, loss: 0.010699925944209099 2023-01-22 08:02:10.027393: step: 88/77, loss: 0.05161707103252411 2023-01-22 08:02:11.339264: step: 92/77, loss: 0.005692289210855961 2023-01-22 08:02:12.687436: step: 96/77, loss: 0.0022551261354237795 2023-01-22 08:02:13.966806: step: 100/77, loss: 0.0010835323482751846 2023-01-22 08:02:15.253163: step: 104/77, loss: 0.00023391967988573015 2023-01-22 08:02:16.550594: step: 108/77, loss: 0.04434645548462868 2023-01-22 08:02:17.876519: step: 112/77, loss: 0.013929794542491436 2023-01-22 08:02:19.203028: step: 116/77, loss: 0.012213967740535736 2023-01-22 08:02:20.527728: step: 120/77, loss: 0.04906335845589638 2023-01-22 08:02:21.874073: step: 124/77, loss: 0.004624365828931332 2023-01-22 08:02:23.163971: step: 128/77, loss: 0.0008969166083261371 2023-01-22 08:02:24.502929: step: 132/77, loss: 0.041873492300510406 2023-01-22 08:02:25.794309: step: 136/77, loss: 0.0015139449387788773 2023-01-22 08:02:27.015425: step: 140/77, loss: 0.008322593756020069 2023-01-22 08:02:28.320653: step: 144/77, loss: 0.03902991861104965 2023-01-22 08:02:29.640699: step: 148/77, loss: 0.016263600438833237 2023-01-22 08:02:30.963410: step: 152/77, loss: 0.009588202461600304 2023-01-22 08:02:32.241751: step: 156/77, loss: 0.00011158635606989264 2023-01-22 08:02:33.519501: step: 160/77, loss: 0.020438387989997864 2023-01-22 08:02:34.811168: step: 164/77, loss: 0.04943925142288208 2023-01-22 08:02:36.108694: step: 168/77, loss: 0.008242144249379635 2023-01-22 08:02:37.408522: step: 172/77, loss: 0.008970575407147408 2023-01-22 08:02:38.675369: step: 176/77, loss: 3.817386459559202e-05 2023-01-22 08:02:39.982081: step: 180/77, loss: 0.04439555108547211 2023-01-22 08:02:41.313263: step: 184/77, loss: 0.007772160694003105 2023-01-22 08:02:42.602051: step: 188/77, loss: 0.0020781750790774822 2023-01-22 08:02:43.958042: step: 192/77, loss: 1.6847297956701368e-05 2023-01-22 08:02:45.241099: step: 196/77, loss: 0.021913882344961166 2023-01-22 08:02:46.593101: step: 200/77, loss: 0.0016695652157068253 2023-01-22 08:02:47.918608: step: 204/77, loss: 0.007512289099395275 2023-01-22 08:02:49.233334: step: 208/77, loss: 0.0001379475143039599 2023-01-22 08:02:50.520227: step: 212/77, loss: 0.012302246876060963 2023-01-22 08:02:51.789366: step: 216/77, loss: 0.0013374080881476402 2023-01-22 08:02:53.063756: step: 220/77, loss: 0.0029978842940181494 2023-01-22 08:02:54.323700: step: 224/77, loss: 0.012731088325381279 2023-01-22 08:02:55.630604: step: 228/77, loss: 0.0037896474823355675 2023-01-22 08:02:56.952012: step: 232/77, loss: 0.007226157002151012 2023-01-22 08:02:58.240854: step: 236/77, loss: 0.010333065874874592 2023-01-22 08:02:59.513714: step: 240/77, loss: 0.025543566793203354 2023-01-22 08:03:00.767298: step: 244/77, loss: 0.003890481311827898 2023-01-22 08:03:02.100713: step: 248/77, loss: 0.0061135608702898026 2023-01-22 08:03:03.353036: step: 252/77, loss: 0.0002315741148777306 2023-01-22 08:03:04.627725: step: 256/77, loss: 0.022473495453596115 2023-01-22 08:03:05.935296: step: 260/77, loss: 0.0070287445560097694 2023-01-22 08:03:07.217083: step: 264/77, loss: 0.0042348294518888 2023-01-22 08:03:08.497318: step: 268/77, loss: 0.008537882007658482 2023-01-22 08:03:09.818279: step: 272/77, loss: 0.00023712392430752516 2023-01-22 08:03:11.112401: step: 276/77, loss: 0.016111375764012337 2023-01-22 08:03:12.437267: step: 280/77, loss: 0.023547686636447906 2023-01-22 08:03:13.781196: step: 284/77, loss: 0.09503351151943207 2023-01-22 08:03:15.059125: step: 288/77, loss: 7.513206946896389e-05 2023-01-22 08:03:16.377317: step: 292/77, loss: 0.004765697754919529 2023-01-22 08:03:17.686004: step: 296/77, loss: 0.003875546855852008 2023-01-22 08:03:18.985044: step: 300/77, loss: 0.0038535785861313343 2023-01-22 08:03:20.284639: step: 304/77, loss: 0.003971458412706852 2023-01-22 08:03:21.627483: step: 308/77, loss: 0.020400844514369965 2023-01-22 08:03:22.919210: step: 312/77, loss: 0.00289251864887774 2023-01-22 08:03:24.281891: step: 316/77, loss: 0.0013335293624550104 2023-01-22 08:03:25.598557: step: 320/77, loss: 6.220525392564014e-06 2023-01-22 08:03:26.907332: step: 324/77, loss: 0.00018553411064203829 2023-01-22 08:03:28.255621: step: 328/77, loss: 0.02815510518848896 2023-01-22 08:03:29.533062: step: 332/77, loss: 1.5467263665414066e-06 2023-01-22 08:03:30.807954: step: 336/77, loss: 0.022374749183654785 2023-01-22 08:03:32.094729: step: 340/77, loss: 6.251667946344241e-05 2023-01-22 08:03:33.392494: step: 344/77, loss: 0.0003971634723711759 2023-01-22 08:03:34.701476: step: 348/77, loss: 0.048008985817432404 2023-01-22 08:03:36.033866: step: 352/77, loss: 0.06184585392475128 2023-01-22 08:03:37.335862: step: 356/77, loss: 0.000282044435152784 2023-01-22 08:03:38.651251: step: 360/77, loss: 0.000977915246039629 2023-01-22 08:03:39.914979: step: 364/77, loss: 0.055823519825935364 2023-01-22 08:03:41.256015: step: 368/77, loss: 0.0415327325463295 2023-01-22 08:03:42.570813: step: 372/77, loss: 0.08297859877347946 2023-01-22 08:03:43.893931: step: 376/77, loss: 0.016023965552449226 2023-01-22 08:03:45.146870: step: 380/77, loss: 0.01686960645020008 2023-01-22 08:03:46.485587: step: 384/77, loss: 0.0008093234500847757 2023-01-22 08:03:47.771408: step: 388/77, loss: 0.04213680326938629 ================================================== Loss: 0.016 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 14} Test Chinese: {'template': {'p': 0.9240506329113924, 'r': 0.5934959349593496, 'f1': 0.7227722772277229}, 'slot': {'p': 0.43636363636363634, 'r': 0.02181818181818182, 'f1': 0.041558441558441565}, 'combined': 0.030037289443230045, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 14} Test Korean: {'template': {'p': 0.925, 'r': 0.6016260162601627, 'f1': 0.7290640394088671}, 'slot': {'p': 0.43636363636363634, 'r': 0.02181818181818182, 'f1': 0.041558441558441565}, 'combined': 0.03029876527413474, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 14} Test Russian: {'template': {'p': 0.925, 'r': 0.6016260162601627, 'f1': 0.7290640394088671}, 'slot': {'p': 0.43636363636363634, 'r': 0.02181818181818182, 'f1': 0.041558441558441565}, 'combined': 0.03029876527413474, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 15 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:05:24.268982: step: 4/77, loss: 0.0011530212359502912 2023-01-22 08:05:25.556670: step: 8/77, loss: 0.006455153692513704 2023-01-22 08:05:26.826725: step: 12/77, loss: 0.03983420133590698 2023-01-22 08:05:28.136676: step: 16/77, loss: 0.011238010600209236 2023-01-22 08:05:29.414213: step: 20/77, loss: 0.0026525515131652355 2023-01-22 08:05:30.720367: step: 24/77, loss: 0.012394571676850319 2023-01-22 08:05:32.040995: step: 28/77, loss: 0.01089528203010559 2023-01-22 08:05:33.330335: step: 32/77, loss: 0.002410109620541334 2023-01-22 08:05:34.629859: step: 36/77, loss: 0.07191843539476395 2023-01-22 08:05:35.915309: step: 40/77, loss: 0.04111922159790993 2023-01-22 08:05:37.148326: step: 44/77, loss: 0.006325423717498779 2023-01-22 08:05:38.423070: step: 48/77, loss: 0.0018762395484372973 2023-01-22 08:05:39.745867: step: 52/77, loss: 0.016316326335072517 2023-01-22 08:05:41.047681: step: 56/77, loss: 0.021863887086510658 2023-01-22 08:05:42.348857: step: 60/77, loss: 0.004629537463188171 2023-01-22 08:05:43.647501: step: 64/77, loss: 0.05812714248895645 2023-01-22 08:05:44.984995: step: 68/77, loss: 0.0006646717665717006 2023-01-22 08:05:46.271549: step: 72/77, loss: 0.025584321469068527 2023-01-22 08:05:47.542167: step: 76/77, loss: 0.005306669045239687 2023-01-22 08:05:48.812272: step: 80/77, loss: 0.00021504539472516626 2023-01-22 08:05:50.104286: step: 84/77, loss: 0.03863883763551712 2023-01-22 08:05:51.361343: step: 88/77, loss: 0.013125048018991947 2023-01-22 08:05:52.696161: step: 92/77, loss: 0.0073471637442708015 2023-01-22 08:05:54.004793: step: 96/77, loss: 0.0024326976854354143 2023-01-22 08:05:55.336160: step: 100/77, loss: 0.014627272263169289 2023-01-22 08:05:56.610462: step: 104/77, loss: 0.03934137895703316 2023-01-22 08:05:57.932595: step: 108/77, loss: 0.0004098295175936073 2023-01-22 08:05:59.211985: step: 112/77, loss: 0.0012081761378794909 2023-01-22 08:06:00.488734: step: 116/77, loss: 0.008219428360462189 2023-01-22 08:06:01.775687: step: 120/77, loss: 0.0010092520387843251 2023-01-22 08:06:03.013655: step: 124/77, loss: 0.02881784364581108 2023-01-22 08:06:04.272815: step: 128/77, loss: 0.00022574173635803163 2023-01-22 08:06:05.577152: step: 132/77, loss: 0.014033297076821327 2023-01-22 08:06:06.844179: step: 136/77, loss: 0.00023875743499957025 2023-01-22 08:06:08.135715: step: 140/77, loss: 0.012619758024811745 2023-01-22 08:06:09.430344: step: 144/77, loss: 0.007678681518882513 2023-01-22 08:06:10.679286: step: 148/77, loss: 0.0004265752504579723 2023-01-22 08:06:11.969926: step: 152/77, loss: 0.047454044222831726 2023-01-22 08:06:13.239688: step: 156/77, loss: 0.0020707775838673115 2023-01-22 08:06:14.539024: step: 160/77, loss: 3.5891003790311515e-05 2023-01-22 08:06:15.902399: step: 164/77, loss: 0.014533540233969688 2023-01-22 08:06:17.140735: step: 168/77, loss: 0.0010304294992238283 2023-01-22 08:06:18.408055: step: 172/77, loss: 0.0004204391734674573 2023-01-22 08:06:19.693650: step: 176/77, loss: 0.0028548361733555794 2023-01-22 08:06:20.988733: step: 180/77, loss: 0.0018736727070063353 2023-01-22 08:06:22.290274: step: 184/77, loss: 5.218083970248699e-06 2023-01-22 08:06:23.582507: step: 188/77, loss: 0.012196572497487068 2023-01-22 08:06:24.835362: step: 192/77, loss: 0.03711218386888504 2023-01-22 08:06:26.165599: step: 196/77, loss: 0.019275743514299393 2023-01-22 08:06:27.485206: step: 200/77, loss: 0.001391243189573288 2023-01-22 08:06:28.795371: step: 204/77, loss: 0.004645006265491247 2023-01-22 08:06:30.086169: step: 208/77, loss: 0.00014529861800838262 2023-01-22 08:06:31.375423: step: 212/77, loss: 0.0007189132156781852 2023-01-22 08:06:32.675442: step: 216/77, loss: 0.0036826361902058125 2023-01-22 08:06:34.008890: step: 220/77, loss: 0.016211818903684616 2023-01-22 08:06:35.330058: step: 224/77, loss: 0.008424910716712475 2023-01-22 08:06:36.602479: step: 228/77, loss: 0.005087016150355339 2023-01-22 08:06:37.904397: step: 232/77, loss: 0.0021495078690350056 2023-01-22 08:06:39.173753: step: 236/77, loss: 0.016692375764250755 2023-01-22 08:06:40.465024: step: 240/77, loss: 0.0010836259461939335 2023-01-22 08:06:41.764153: step: 244/77, loss: 8.891993638826534e-05 2023-01-22 08:06:43.044031: step: 248/77, loss: 0.0019844304770231247 2023-01-22 08:06:44.350112: step: 252/77, loss: 0.008615804836153984 2023-01-22 08:06:45.631312: step: 256/77, loss: 0.00010830286919372156 2023-01-22 08:06:46.902399: step: 260/77, loss: 0.009233257733285427 2023-01-22 08:06:48.258407: step: 264/77, loss: 0.01212453655898571 2023-01-22 08:06:49.547943: step: 268/77, loss: 0.0026560192927718163 2023-01-22 08:06:50.823298: step: 272/77, loss: 0.006347885821014643 2023-01-22 08:06:52.104766: step: 276/77, loss: 0.0009034523391164839 2023-01-22 08:06:53.437624: step: 280/77, loss: 0.003021764103323221 2023-01-22 08:06:54.766311: step: 284/77, loss: 0.006079188548028469 2023-01-22 08:06:56.059027: step: 288/77, loss: 0.038373105227947235 2023-01-22 08:06:57.345718: step: 292/77, loss: 0.0031433424446731806 2023-01-22 08:06:58.651099: step: 296/77, loss: 0.02784053236246109 2023-01-22 08:06:59.956246: step: 300/77, loss: 0.0005230466485954821 2023-01-22 08:07:01.276511: step: 304/77, loss: 0.00455853994935751 2023-01-22 08:07:02.544727: step: 308/77, loss: 0.0012094294652342796 2023-01-22 08:07:03.889297: step: 312/77, loss: 0.0003353256033733487 2023-01-22 08:07:05.180887: step: 316/77, loss: 0.00011404056567698717 2023-01-22 08:07:06.485019: step: 320/77, loss: 0.1456245481967926 2023-01-22 08:07:07.785984: step: 324/77, loss: 0.0008838959038257599 2023-01-22 08:07:09.081483: step: 328/77, loss: 0.04460209980607033 2023-01-22 08:07:10.371605: step: 332/77, loss: 0.0077811977826058865 2023-01-22 08:07:11.656877: step: 336/77, loss: 0.000999920885078609 2023-01-22 08:07:12.940255: step: 340/77, loss: 0.0008804184617474675 2023-01-22 08:07:14.225682: step: 344/77, loss: 0.03426405042409897 2023-01-22 08:07:15.538700: step: 348/77, loss: 0.00249478523619473 2023-01-22 08:07:16.871401: step: 352/77, loss: 0.028345339000225067 2023-01-22 08:07:18.160216: step: 356/77, loss: 1.6141852029250003e-05 2023-01-22 08:07:19.473891: step: 360/77, loss: 0.004599923733621836 2023-01-22 08:07:20.720451: step: 364/77, loss: 0.0015322489198297262 2023-01-22 08:07:22.045852: step: 368/77, loss: 0.029466429725289345 2023-01-22 08:07:23.405906: step: 372/77, loss: 0.0002610405208542943 2023-01-22 08:07:24.703690: step: 376/77, loss: 0.00028324045706540346 2023-01-22 08:07:25.969261: step: 380/77, loss: 0.00011046537838410586 2023-01-22 08:07:27.285210: step: 384/77, loss: 0.01054134126752615 2023-01-22 08:07:28.615346: step: 388/77, loss: 0.0020199620630592108 ================================================== Loss: 0.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Chinese: {'template': {'p': 0.8974358974358975, 'r': 0.5691056910569106, 'f1': 0.6965174129353233}, 'slot': {'p': 0.4583333333333333, 'r': 0.02, 'f1': 0.03832752613240418}, 'combined': 0.026695789345953156, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Korean: {'template': {'p': 0.8974358974358975, 'r': 0.5691056910569106, 'f1': 0.6965174129353233}, 'slot': {'p': 0.45652173913043476, 'r': 0.019090909090909092, 'f1': 0.03664921465968587}, 'combined': 0.025526816180875725, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Russian: {'template': {'p': 0.8961038961038961, 'r': 0.5609756097560976, 'f1': 0.69}, 'slot': {'p': 0.46808510638297873, 'r': 0.02, 'f1': 0.03836094158674804}, 'combined': 0.026469049694856146, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 16 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:09:05.525025: step: 4/77, loss: 0.006552703212946653 2023-01-22 08:09:06.849060: step: 8/77, loss: 0.0009742131805978715 2023-01-22 08:09:08.148064: step: 12/77, loss: 0.0017733649583533406 2023-01-22 08:09:09.488100: step: 16/77, loss: 0.03460262715816498 2023-01-22 08:09:10.809333: step: 20/77, loss: 0.013664139434695244 2023-01-22 08:09:12.124795: step: 24/77, loss: 6.580878107342869e-05 2023-01-22 08:09:13.403592: step: 28/77, loss: 0.011652027256786823 2023-01-22 08:09:14.671286: step: 32/77, loss: 0.02981100231409073 2023-01-22 08:09:15.954305: step: 36/77, loss: 0.018231457099318504 2023-01-22 08:09:17.228395: step: 40/77, loss: 0.009962860494852066 2023-01-22 08:09:18.451424: step: 44/77, loss: 0.01126846019178629 2023-01-22 08:09:19.696281: step: 48/77, loss: 1.236806383531075e-05 2023-01-22 08:09:20.975592: step: 52/77, loss: 0.0025686314329504967 2023-01-22 08:09:22.244689: step: 56/77, loss: 0.022444654256105423 2023-01-22 08:09:23.528000: step: 60/77, loss: 8.586797775933519e-06 2023-01-22 08:09:24.801821: step: 64/77, loss: 6.882055458845571e-06 2023-01-22 08:09:26.112974: step: 68/77, loss: 0.004656787030398846 2023-01-22 08:09:27.369997: step: 72/77, loss: 0.005976193118840456 2023-01-22 08:09:28.628631: step: 76/77, loss: 0.003050893312320113 2023-01-22 08:09:29.925445: step: 80/77, loss: 0.11868611723184586 2023-01-22 08:09:31.210895: step: 84/77, loss: 0.022080106660723686 2023-01-22 08:09:32.450157: step: 88/77, loss: 0.005611381493508816 2023-01-22 08:09:33.748779: step: 92/77, loss: 0.0006953682750463486 2023-01-22 08:09:35.086916: step: 96/77, loss: 0.003470786614343524 2023-01-22 08:09:36.414785: step: 100/77, loss: 0.00038696586852893233 2023-01-22 08:09:37.629595: step: 104/77, loss: 0.04702460765838623 2023-01-22 08:09:38.891722: step: 108/77, loss: 0.00037538877222687006 2023-01-22 08:09:40.223809: step: 112/77, loss: 0.0009814082877710462 2023-01-22 08:09:41.503953: step: 116/77, loss: 0.00010789869702421129 2023-01-22 08:09:42.826469: step: 120/77, loss: 0.02089921198785305 2023-01-22 08:09:44.151306: step: 124/77, loss: 0.014739819802343845 2023-01-22 08:09:45.420125: step: 128/77, loss: 0.001180226681753993 2023-01-22 08:09:46.731797: step: 132/77, loss: 0.04211696237325668 2023-01-22 08:09:48.006478: step: 136/77, loss: 3.5734439734369516e-05 2023-01-22 08:09:49.324340: step: 140/77, loss: 0.04853532090783119 2023-01-22 08:09:50.579704: step: 144/77, loss: 0.004052129108458757 2023-01-22 08:09:51.870954: step: 148/77, loss: 0.0004372121475171298 2023-01-22 08:09:53.152300: step: 152/77, loss: 0.04190046712756157 2023-01-22 08:09:54.455200: step: 156/77, loss: 0.0018922454910352826 2023-01-22 08:09:55.745641: step: 160/77, loss: 0.009773260913789272 2023-01-22 08:09:57.045035: step: 164/77, loss: 0.001528030028566718 2023-01-22 08:09:58.348950: step: 168/77, loss: 0.02765164151787758 2023-01-22 08:09:59.692183: step: 172/77, loss: 0.0005780403153039515 2023-01-22 08:10:01.017222: step: 176/77, loss: 0.0038615819066762924 2023-01-22 08:10:02.286569: step: 180/77, loss: 0.014504548162221909 2023-01-22 08:10:03.611962: step: 184/77, loss: 0.007875760085880756 2023-01-22 08:10:04.907776: step: 188/77, loss: 0.0010432120179757476 2023-01-22 08:10:06.173363: step: 192/77, loss: 1.9311471533001168e-06 2023-01-22 08:10:07.490255: step: 196/77, loss: 0.008792483247816563 2023-01-22 08:10:08.778913: step: 200/77, loss: 0.033647846430540085 2023-01-22 08:10:10.097313: step: 204/77, loss: 8.60294239828363e-05 2023-01-22 08:10:11.419116: step: 208/77, loss: 0.0026480434462428093 2023-01-22 08:10:12.704259: step: 212/77, loss: 0.0073967669159173965 2023-01-22 08:10:14.002429: step: 216/77, loss: 0.027375243604183197 2023-01-22 08:10:15.265171: step: 220/77, loss: 0.0054856291972100735 2023-01-22 08:10:16.519894: step: 224/77, loss: 0.024888327345252037 2023-01-22 08:10:17.795445: step: 228/77, loss: 0.0061912983655929565 2023-01-22 08:10:19.136608: step: 232/77, loss: 0.004951823502779007 2023-01-22 08:10:20.480869: step: 236/77, loss: 0.002676447154954076 2023-01-22 08:10:21.814223: step: 240/77, loss: 4.45177975052502e-05 2023-01-22 08:10:23.065784: step: 244/77, loss: 0.010543622076511383 2023-01-22 08:10:24.365904: step: 248/77, loss: 0.02687632292509079 2023-01-22 08:10:25.644517: step: 252/77, loss: 0.0023010619916021824 2023-01-22 08:10:26.935346: step: 256/77, loss: 0.07297077775001526 2023-01-22 08:10:28.253172: step: 260/77, loss: 0.0007688794867135584 2023-01-22 08:10:29.543868: step: 264/77, loss: 7.96151434769854e-05 2023-01-22 08:10:30.884642: step: 268/77, loss: 0.00010676166857592762 2023-01-22 08:10:32.191735: step: 272/77, loss: 0.011700259521603584 2023-01-22 08:10:33.508567: step: 276/77, loss: 0.010706624016165733 2023-01-22 08:10:34.822260: step: 280/77, loss: 0.00010317780106561258 2023-01-22 08:10:36.099118: step: 284/77, loss: 0.027786539867520332 2023-01-22 08:10:37.413325: step: 288/77, loss: 0.001600670162588358 2023-01-22 08:10:38.719368: step: 292/77, loss: 0.0003801693383138627 2023-01-22 08:10:39.995912: step: 296/77, loss: 0.0010835555149242282 2023-01-22 08:10:41.292658: step: 300/77, loss: 0.0013048271648585796 2023-01-22 08:10:42.537332: step: 304/77, loss: 0.00392378494143486 2023-01-22 08:10:43.801686: step: 308/77, loss: 2.722800854826346e-05 2023-01-22 08:10:45.078109: step: 312/77, loss: 0.01019936054944992 2023-01-22 08:10:46.382346: step: 316/77, loss: 0.04807935282588005 2023-01-22 08:10:47.656633: step: 320/77, loss: 3.8343645428540185e-05 2023-01-22 08:10:48.970186: step: 324/77, loss: 0.00013135296467225999 2023-01-22 08:10:50.286106: step: 328/77, loss: 0.012444362044334412 2023-01-22 08:10:51.563327: step: 332/77, loss: 0.0005562487640418112 2023-01-22 08:10:52.830183: step: 336/77, loss: 0.0027817520312964916 2023-01-22 08:10:54.173231: step: 340/77, loss: 0.04739411547780037 2023-01-22 08:10:55.456317: step: 344/77, loss: 0.015705594792962074 2023-01-22 08:10:56.745570: step: 348/77, loss: 0.014253773726522923 2023-01-22 08:10:58.054575: step: 352/77, loss: 0.0039000764954835176 2023-01-22 08:10:59.368566: step: 356/77, loss: 0.04627962410449982 2023-01-22 08:11:00.628510: step: 360/77, loss: 0.033766452223062515 2023-01-22 08:11:01.871261: step: 364/77, loss: 0.0040741246193647385 2023-01-22 08:11:03.215849: step: 368/77, loss: 0.002382186008617282 2023-01-22 08:11:04.467194: step: 372/77, loss: 0.0462690070271492 2023-01-22 08:11:05.729243: step: 376/77, loss: 0.000892925076186657 2023-01-22 08:11:07.019846: step: 380/77, loss: 0.01855640299618244 2023-01-22 08:11:08.301258: step: 384/77, loss: 0.0007892133435234427 2023-01-22 08:11:09.602964: step: 388/77, loss: 0.05648082494735718 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 16} Test Chinese: {'template': {'p': 0.961038961038961, 'r': 0.6016260162601627, 'f1': 0.74}, 'slot': {'p': 0.4722222222222222, 'r': 0.015454545454545455, 'f1': 0.029929577464788734}, 'combined': 0.02214788732394366, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 16} Test Korean: {'template': {'p': 0.9615384615384616, 'r': 0.6097560975609756, 'f1': 0.7462686567164178}, 'slot': {'p': 0.4722222222222222, 'r': 0.015454545454545455, 'f1': 0.029929577464788734}, 'combined': 0.02233550557073786, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 16} Test Russian: {'template': {'p': 0.961038961038961, 'r': 0.6016260162601627, 'f1': 0.74}, 'slot': {'p': 0.4594594594594595, 'r': 0.015454545454545455, 'f1': 0.029903254177660512}, 'combined': 0.02212840809146878, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 17 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:12:46.254830: step: 4/77, loss: 0.0003037060087081045 2023-01-22 08:12:47.561304: step: 8/77, loss: 0.00038242657319642603 2023-01-22 08:12:48.894444: step: 12/77, loss: 0.0013528617564588785 2023-01-22 08:12:50.203786: step: 16/77, loss: 0.00010484598897164688 2023-01-22 08:12:51.479891: step: 20/77, loss: 0.004785007331520319 2023-01-22 08:12:52.808345: step: 24/77, loss: 0.00010676060628611594 2023-01-22 08:12:54.047753: step: 28/77, loss: 0.00020604948804248124 2023-01-22 08:12:55.354573: step: 32/77, loss: 0.0016604478005319834 2023-01-22 08:12:56.635888: step: 36/77, loss: 0.0016582023818045855 2023-01-22 08:12:57.938866: step: 40/77, loss: 0.0004796137218363583 2023-01-22 08:12:59.206680: step: 44/77, loss: 0.008982490748167038 2023-01-22 08:13:00.421731: step: 48/77, loss: 0.005666470155119896 2023-01-22 08:13:01.705815: step: 52/77, loss: 0.013005124405026436 2023-01-22 08:13:03.015171: step: 56/77, loss: 0.00043898681178689003 2023-01-22 08:13:04.279135: step: 60/77, loss: 0.010772701352834702 2023-01-22 08:13:05.544737: step: 64/77, loss: 0.014273944310843945 2023-01-22 08:13:06.794042: step: 68/77, loss: 0.009036684408783913 2023-01-22 08:13:08.099376: step: 72/77, loss: 0.0006658356287516654 2023-01-22 08:13:09.402041: step: 76/77, loss: 1.80274473677855e-05 2023-01-22 08:13:10.656535: step: 80/77, loss: 0.00024116590793710202 2023-01-22 08:13:11.948532: step: 84/77, loss: 0.014361845329403877 2023-01-22 08:13:13.232437: step: 88/77, loss: 0.02260422147810459 2023-01-22 08:13:14.497725: step: 92/77, loss: 0.0029067930299788713 2023-01-22 08:13:15.782850: step: 96/77, loss: 0.0024856228847056627 2023-01-22 08:13:17.094527: step: 100/77, loss: 1.2357354535197373e-05 2023-01-22 08:13:18.408485: step: 104/77, loss: 0.0018393194768577814 2023-01-22 08:13:19.710568: step: 108/77, loss: 4.3470787204569206e-05 2023-01-22 08:13:20.989463: step: 112/77, loss: 0.012341336347162724 2023-01-22 08:13:22.288581: step: 116/77, loss: 0.004242464900016785 2023-01-22 08:13:23.599688: step: 120/77, loss: 0.0019005483482033014 2023-01-22 08:13:24.885713: step: 124/77, loss: 8.54067548061721e-06 2023-01-22 08:13:26.155206: step: 128/77, loss: 0.00011833629105240107 2023-01-22 08:13:27.485459: step: 132/77, loss: 0.04586326330900192 2023-01-22 08:13:28.710961: step: 136/77, loss: 0.012901647947728634 2023-01-22 08:13:29.981866: step: 140/77, loss: 0.0001440030027879402 2023-01-22 08:13:31.266249: step: 144/77, loss: 0.00040081614861264825 2023-01-22 08:13:32.614947: step: 148/77, loss: 0.02381657063961029 2023-01-22 08:13:33.916858: step: 152/77, loss: 0.0023007721174508333 2023-01-22 08:13:35.236398: step: 156/77, loss: 0.0388503298163414 2023-01-22 08:13:36.539534: step: 160/77, loss: 0.0034463191404938698 2023-01-22 08:13:37.876971: step: 164/77, loss: 0.0004672359791584313 2023-01-22 08:13:39.189878: step: 168/77, loss: 0.0018541706958785653 2023-01-22 08:13:40.483062: step: 172/77, loss: 0.0327320471405983 2023-01-22 08:13:41.757737: step: 176/77, loss: 0.0024690069258213043 2023-01-22 08:13:43.065240: step: 180/77, loss: 0.0010510309366509318 2023-01-22 08:13:44.346902: step: 184/77, loss: 0.0008923964924179018 2023-01-22 08:13:45.641235: step: 188/77, loss: 0.00029543269192799926 2023-01-22 08:13:46.933616: step: 192/77, loss: 0.0028764773160219193 2023-01-22 08:13:48.235869: step: 196/77, loss: 1.1312491551507264e-05 2023-01-22 08:13:49.587440: step: 200/77, loss: 0.009550292044878006 2023-01-22 08:13:50.901351: step: 204/77, loss: 0.0052166227251291275 2023-01-22 08:13:52.205648: step: 208/77, loss: 0.0008433779585175216 2023-01-22 08:13:53.497519: step: 212/77, loss: 2.461548319843132e-06 2023-01-22 08:13:54.813561: step: 216/77, loss: 0.0022772536613047123 2023-01-22 08:13:56.100547: step: 220/77, loss: 0.0036285670939832926 2023-01-22 08:13:57.357040: step: 224/77, loss: 0.032222650945186615 2023-01-22 08:13:58.603930: step: 228/77, loss: 6.528257654281333e-05 2023-01-22 08:13:59.923318: step: 232/77, loss: 0.010111379437148571 2023-01-22 08:14:01.222666: step: 236/77, loss: 0.003799677127972245 2023-01-22 08:14:02.535589: step: 240/77, loss: 0.026927510276436806 2023-01-22 08:14:03.907478: step: 244/77, loss: 0.009405246004462242 2023-01-22 08:14:05.243106: step: 248/77, loss: 0.032634347677230835 2023-01-22 08:14:06.548058: step: 252/77, loss: 0.0018777992809191346 2023-01-22 08:14:07.839408: step: 256/77, loss: 0.06961376965045929 2023-01-22 08:14:09.137864: step: 260/77, loss: 0.015779821202158928 2023-01-22 08:14:10.456265: step: 264/77, loss: 0.0018593231216073036 2023-01-22 08:14:11.748533: step: 268/77, loss: 3.989012475358322e-05 2023-01-22 08:14:13.063700: step: 272/77, loss: 2.904075699916575e-05 2023-01-22 08:14:14.367495: step: 276/77, loss: 1.4655574887001421e-05 2023-01-22 08:14:15.658314: step: 280/77, loss: 0.006797629874199629 2023-01-22 08:14:16.952896: step: 284/77, loss: 0.0001961684611160308 2023-01-22 08:14:18.250767: step: 288/77, loss: 0.04722617566585541 2023-01-22 08:14:19.612511: step: 292/77, loss: 0.0010816743597388268 2023-01-22 08:14:20.928998: step: 296/77, loss: 1.5971452739904635e-05 2023-01-22 08:14:22.203534: step: 300/77, loss: 0.03634996712207794 2023-01-22 08:14:23.504458: step: 304/77, loss: 0.004210877697914839 2023-01-22 08:14:24.890950: step: 308/77, loss: 0.0038256642874330282 2023-01-22 08:14:26.147946: step: 312/77, loss: 0.0038271218072623014 2023-01-22 08:14:27.456330: step: 316/77, loss: 0.052637092769145966 2023-01-22 08:14:28.817692: step: 320/77, loss: 0.0030049553606659174 2023-01-22 08:14:30.176313: step: 324/77, loss: 0.0036819763481616974 2023-01-22 08:14:31.432434: step: 328/77, loss: 0.007940493524074554 2023-01-22 08:14:32.767216: step: 332/77, loss: 0.014108079485595226 2023-01-22 08:14:34.132685: step: 336/77, loss: 0.00021898458362556994 2023-01-22 08:14:35.448158: step: 340/77, loss: 7.66866924095666e-06 2023-01-22 08:14:36.679105: step: 344/77, loss: 0.035794321447610855 2023-01-22 08:14:38.018035: step: 348/77, loss: 0.0010580236557871103 2023-01-22 08:14:39.316191: step: 352/77, loss: 0.0006705737905576825 2023-01-22 08:14:40.601293: step: 356/77, loss: 8.463625272270292e-05 2023-01-22 08:14:41.868066: step: 360/77, loss: 0.03241058066487312 2023-01-22 08:14:43.139502: step: 364/77, loss: 0.0008318339241668582 2023-01-22 08:14:44.365591: step: 368/77, loss: 0.008623465895652771 2023-01-22 08:14:45.689455: step: 372/77, loss: 0.0006912436801940203 2023-01-22 08:14:47.008442: step: 376/77, loss: 0.007522268686443567 2023-01-22 08:14:48.336906: step: 380/77, loss: 0.010126580484211445 2023-01-22 08:14:49.653076: step: 384/77, loss: 0.00013385726197157055 2023-01-22 08:14:50.956467: step: 388/77, loss: 0.0001453287695767358 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Chinese: {'template': {'p': 0.9512195121951219, 'r': 0.6341463414634146, 'f1': 0.7609756097560976}, 'slot': {'p': 0.5555555555555556, 'r': 0.022727272727272728, 'f1': 0.04366812227074236}, 'combined': 0.03323037597188199, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Korean: {'template': {'p': 0.9512195121951219, 'r': 0.6341463414634146, 'f1': 0.7609756097560976}, 'slot': {'p': 0.5555555555555556, 'r': 0.022727272727272728, 'f1': 0.04366812227074236}, 'combined': 0.03323037597188199, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Russian: {'template': {'p': 0.9506172839506173, 'r': 0.6260162601626016, 'f1': 0.7549019607843136}, 'slot': {'p': 0.5681818181818182, 'r': 0.022727272727272728, 'f1': 0.04370629370629371}, 'combined': 0.03299396681749623, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 18 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:16:29.934324: step: 4/77, loss: 0.001075280481018126 2023-01-22 08:16:31.202593: step: 8/77, loss: 0.0016390618402510881 2023-01-22 08:16:32.486331: step: 12/77, loss: 2.4888377083698288e-05 2023-01-22 08:16:33.800438: step: 16/77, loss: 0.00032724355696700513 2023-01-22 08:16:35.063751: step: 20/77, loss: 3.541986006894149e-05 2023-01-22 08:16:36.373238: step: 24/77, loss: 0.012699036858975887 2023-01-22 08:16:37.673548: step: 28/77, loss: 3.851627843687311e-05 2023-01-22 08:16:38.970975: step: 32/77, loss: 0.0511711984872818 2023-01-22 08:16:40.301741: step: 36/77, loss: 0.001221914542838931 2023-01-22 08:16:41.604099: step: 40/77, loss: 0.019967470318078995 2023-01-22 08:16:42.870021: step: 44/77, loss: 0.00016486311506014317 2023-01-22 08:16:44.157197: step: 48/77, loss: 6.0238003243284766e-06 2023-01-22 08:16:45.436291: step: 52/77, loss: 0.003115791827440262 2023-01-22 08:16:46.737602: step: 56/77, loss: 0.0009298291988670826 2023-01-22 08:16:48.049410: step: 60/77, loss: 2.789767131616827e-05 2023-01-22 08:16:49.315693: step: 64/77, loss: 0.025554176419973373 2023-01-22 08:16:50.615647: step: 68/77, loss: 0.056334611028432846 2023-01-22 08:16:51.915882: step: 72/77, loss: 0.0028062222991138697 2023-01-22 08:16:53.279048: step: 76/77, loss: 0.10629882663488388 2023-01-22 08:16:54.605427: step: 80/77, loss: 0.05901770293712616 2023-01-22 08:16:55.887809: step: 84/77, loss: 1.7697377188596874e-05 2023-01-22 08:16:57.204361: step: 88/77, loss: 0.010094331577420235 2023-01-22 08:16:58.479810: step: 92/77, loss: 0.0006838410045020282 2023-01-22 08:16:59.792574: step: 96/77, loss: 3.121390182059258e-05 2023-01-22 08:17:01.085433: step: 100/77, loss: 0.00021746208949480206 2023-01-22 08:17:02.396277: step: 104/77, loss: 0.00015648044063709676 2023-01-22 08:17:03.681688: step: 108/77, loss: 0.007220795378088951 2023-01-22 08:17:04.977694: step: 112/77, loss: 0.006847000680863857 2023-01-22 08:17:06.281525: step: 116/77, loss: 1.8800854377332143e-05 2023-01-22 08:17:07.588201: step: 120/77, loss: 0.000735764333512634 2023-01-22 08:17:08.855165: step: 124/77, loss: 0.0020050792954862118 2023-01-22 08:17:10.194103: step: 128/77, loss: 6.402538929251023e-06 2023-01-22 08:17:11.497056: step: 132/77, loss: 0.01608336716890335 2023-01-22 08:17:12.804849: step: 136/77, loss: 6.378698162734509e-05 2023-01-22 08:17:14.148975: step: 140/77, loss: 0.0006921316962689161 2023-01-22 08:17:15.412571: step: 144/77, loss: 3.956109139835462e-06 2023-01-22 08:17:16.737440: step: 148/77, loss: 0.008236408233642578 2023-01-22 08:17:18.047451: step: 152/77, loss: 0.003246211213991046 2023-01-22 08:17:19.384623: step: 156/77, loss: 0.04419539496302605 2023-01-22 08:17:20.700750: step: 160/77, loss: 0.0031513397116214037 2023-01-22 08:17:21.996526: step: 164/77, loss: 0.0011053562629967928 2023-01-22 08:17:23.335261: step: 168/77, loss: 0.0018971740501001477 2023-01-22 08:17:24.665708: step: 172/77, loss: 1.8000569980358705e-05 2023-01-22 08:17:25.972046: step: 176/77, loss: 0.0016024464275687933 2023-01-22 08:17:27.233125: step: 180/77, loss: 0.00030853645876049995 2023-01-22 08:17:28.585163: step: 184/77, loss: 0.010142313316464424 2023-01-22 08:17:29.854715: step: 188/77, loss: 0.01915617100894451 2023-01-22 08:17:31.122374: step: 192/77, loss: 0.004522524308413267 2023-01-22 08:17:32.402985: step: 196/77, loss: 0.006432425230741501 2023-01-22 08:17:33.656153: step: 200/77, loss: 0.01698668859899044 2023-01-22 08:17:34.974409: step: 204/77, loss: 0.0007753705722279847 2023-01-22 08:17:36.303615: step: 208/77, loss: 0.003447559429332614 2023-01-22 08:17:37.620448: step: 212/77, loss: 0.07996546477079391 2023-01-22 08:17:38.880472: step: 216/77, loss: 0.0007899499032646418 2023-01-22 08:17:40.179031: step: 220/77, loss: 0.00014828376879449934 2023-01-22 08:17:41.519320: step: 224/77, loss: 0.0031378234270960093 2023-01-22 08:17:42.835174: step: 228/77, loss: 0.0024087419733405113 2023-01-22 08:17:44.160004: step: 232/77, loss: 0.022225063294172287 2023-01-22 08:17:45.472923: step: 236/77, loss: 0.013735632412135601 2023-01-22 08:17:46.759111: step: 240/77, loss: 0.024480065330863 2023-01-22 08:17:48.007596: step: 244/77, loss: 0.0015148434322327375 2023-01-22 08:17:49.299073: step: 248/77, loss: 0.047594137489795685 2023-01-22 08:17:50.624274: step: 252/77, loss: 1.2218940526054212e-07 2023-01-22 08:17:52.009868: step: 256/77, loss: 0.00227353535592556 2023-01-22 08:17:53.305523: step: 260/77, loss: 0.013032825663685799 2023-01-22 08:17:54.592545: step: 264/77, loss: 0.0023532661143690348 2023-01-22 08:17:55.898757: step: 268/77, loss: 1.5874851669650525e-05 2023-01-22 08:17:57.207684: step: 272/77, loss: 3.869067586492747e-05 2023-01-22 08:17:58.523801: step: 276/77, loss: 0.007464665919542313 2023-01-22 08:17:59.820959: step: 280/77, loss: 7.692573490203358e-06 2023-01-22 08:18:01.113608: step: 284/77, loss: 0.0008798784692771733 2023-01-22 08:18:02.399833: step: 288/77, loss: 0.0005066373851150274 2023-01-22 08:18:03.670086: step: 292/77, loss: 0.0033748496789485216 2023-01-22 08:18:04.962256: step: 296/77, loss: 0.03423455357551575 2023-01-22 08:18:06.259567: step: 300/77, loss: 0.0004929510178044438 2023-01-22 08:18:07.605620: step: 304/77, loss: 0.30837780237197876 2023-01-22 08:18:08.935388: step: 308/77, loss: 7.889981498010457e-05 2023-01-22 08:18:10.262130: step: 312/77, loss: 0.05586982145905495 2023-01-22 08:18:11.589991: step: 316/77, loss: 0.007840905338525772 2023-01-22 08:18:12.878717: step: 320/77, loss: 0.0010584781412035227 2023-01-22 08:18:14.170146: step: 324/77, loss: 0.0004989710869267583 2023-01-22 08:18:15.467296: step: 328/77, loss: 0.00011972568609053269 2023-01-22 08:18:16.755428: step: 332/77, loss: 0.0002813867758959532 2023-01-22 08:18:18.074463: step: 336/77, loss: 0.008618929423391819 2023-01-22 08:18:19.443242: step: 340/77, loss: 1.6787849745014682e-05 2023-01-22 08:18:20.741598: step: 344/77, loss: 0.0036653918214142323 2023-01-22 08:18:22.094514: step: 348/77, loss: 0.0006011840887367725 2023-01-22 08:18:23.420978: step: 352/77, loss: 7.024506885500159e-06 2023-01-22 08:18:24.731323: step: 356/77, loss: 9.986665827454999e-05 2023-01-22 08:18:26.032302: step: 360/77, loss: 4.4611333578359336e-06 2023-01-22 08:18:27.370752: step: 364/77, loss: 0.001176069607026875 2023-01-22 08:18:28.659737: step: 368/77, loss: 3.282143370597623e-05 2023-01-22 08:18:29.941161: step: 372/77, loss: 0.002442543162032962 2023-01-22 08:18:31.260189: step: 376/77, loss: 0.04803265258669853 2023-01-22 08:18:32.571047: step: 380/77, loss: 0.0002765162498690188 2023-01-22 08:18:33.852035: step: 384/77, loss: 0.007188045885413885 2023-01-22 08:18:35.140198: step: 388/77, loss: 0.02457827515900135 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Chinese: {'template': {'p': 0.9625, 'r': 0.6260162601626016, 'f1': 0.7586206896551725}, 'slot': {'p': 0.49056603773584906, 'r': 0.023636363636363636, 'f1': 0.0450997398091934}, 'combined': 0.034213595717319134, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Korean: {'template': {'p': 0.9625, 'r': 0.6260162601626016, 'f1': 0.7586206896551725}, 'slot': {'p': 0.48148148148148145, 'r': 0.023636363636363636, 'f1': 0.045060658578856154}, 'combined': 0.03418394788740812, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Russian: {'template': {'p': 0.9629629629629629, 'r': 0.6341463414634146, 'f1': 0.7647058823529412}, 'slot': {'p': 0.48148148148148145, 'r': 0.023636363636363636, 'f1': 0.045060658578856154}, 'combined': 0.03445815067794883, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 19 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:20:14.454646: step: 4/77, loss: 0.002298800041899085 2023-01-22 08:20:15.725508: step: 8/77, loss: 0.0049286182038486 2023-01-22 08:20:17.053111: step: 12/77, loss: 0.0019144328543916345 2023-01-22 08:20:18.337715: step: 16/77, loss: 0.00031539713381789625 2023-01-22 08:20:19.649183: step: 20/77, loss: 0.0002348479174543172 2023-01-22 08:20:20.950921: step: 24/77, loss: 0.00013729455531574786 2023-01-22 08:20:22.262986: step: 28/77, loss: 6.911471427883953e-05 2023-01-22 08:20:23.554297: step: 32/77, loss: 0.02153841033577919 2023-01-22 08:20:24.848205: step: 36/77, loss: 9.762252739164978e-05 2023-01-22 08:20:26.142947: step: 40/77, loss: 0.003212488256394863 2023-01-22 08:20:27.469872: step: 44/77, loss: 0.003010992892086506 2023-01-22 08:20:28.825752: step: 48/77, loss: 0.0010896417079493403 2023-01-22 08:20:30.128342: step: 52/77, loss: 0.028930403292179108 2023-01-22 08:20:31.483398: step: 56/77, loss: 4.31520129495766e-06 2023-01-22 08:20:32.773170: step: 60/77, loss: 0.012675435282289982 2023-01-22 08:20:34.087651: step: 64/77, loss: 0.0007141504902392626 2023-01-22 08:20:35.384136: step: 68/77, loss: 0.0005412647151388228 2023-01-22 08:20:36.670521: step: 72/77, loss: 4.95978338221903e-06 2023-01-22 08:20:37.932826: step: 76/77, loss: 0.0003975495637860149 2023-01-22 08:20:39.250222: step: 80/77, loss: 0.0002749481936916709 2023-01-22 08:20:40.470143: step: 84/77, loss: 0.016873259097337723 2023-01-22 08:20:41.742179: step: 88/77, loss: 0.030899502336978912 2023-01-22 08:20:43.099276: step: 92/77, loss: 0.003275349037721753 2023-01-22 08:20:44.435945: step: 96/77, loss: 0.047433532774448395 2023-01-22 08:20:45.736303: step: 100/77, loss: 0.0008755750604905188 2023-01-22 08:20:47.053213: step: 104/77, loss: 6.030965960235335e-05 2023-01-22 08:20:48.295816: step: 108/77, loss: 0.043235622346401215 2023-01-22 08:20:49.565332: step: 112/77, loss: 0.01793503761291504 2023-01-22 08:20:50.823897: step: 116/77, loss: 0.0010350135853514075 2023-01-22 08:20:52.114651: step: 120/77, loss: 2.9047590942354873e-05 2023-01-22 08:20:53.415439: step: 124/77, loss: 5.7926597946789116e-05 2023-01-22 08:20:54.660246: step: 128/77, loss: 9.975417924579233e-05 2023-01-22 08:20:55.940014: step: 132/77, loss: 0.016122905537486076 2023-01-22 08:20:57.229370: step: 136/77, loss: 0.012848117388784885 2023-01-22 08:20:58.531096: step: 140/77, loss: 0.00012284377589821815 2023-01-22 08:20:59.795246: step: 144/77, loss: 0.00014236402057576925 2023-01-22 08:21:01.057293: step: 148/77, loss: 0.03676806390285492 2023-01-22 08:21:02.359109: step: 152/77, loss: 0.03311936557292938 2023-01-22 08:21:03.678269: step: 156/77, loss: 3.850349457934499e-05 2023-01-22 08:21:04.975562: step: 160/77, loss: 0.0006413233932107687 2023-01-22 08:21:06.288786: step: 164/77, loss: 0.0027559632435441017 2023-01-22 08:21:07.558274: step: 168/77, loss: 0.0016262399731203914 2023-01-22 08:21:08.856067: step: 172/77, loss: 0.0035593020729720592 2023-01-22 08:21:10.135669: step: 176/77, loss: 0.00036346568958833814 2023-01-22 08:21:11.434916: step: 180/77, loss: 0.0005653153057210147 2023-01-22 08:21:12.734945: step: 184/77, loss: 2.0861619987044833e-08 2023-01-22 08:21:14.055971: step: 188/77, loss: 0.00012467730266507715 2023-01-22 08:21:15.356135: step: 192/77, loss: 0.001481741899624467 2023-01-22 08:21:16.713580: step: 196/77, loss: 6.3034076447365806e-06 2023-01-22 08:21:17.999709: step: 200/77, loss: 0.00010196158837061375 2023-01-22 08:21:19.301815: step: 204/77, loss: 0.0004513350431807339 2023-01-22 08:21:20.624422: step: 208/77, loss: 0.005133800208568573 2023-01-22 08:21:21.934853: step: 212/77, loss: 0.00018107425421476364 2023-01-22 08:21:23.259400: step: 216/77, loss: 2.1516948436328676e-06 2023-01-22 08:21:24.507527: step: 220/77, loss: 0.009150481782853603 2023-01-22 08:21:25.801389: step: 224/77, loss: 0.000125797902001068 2023-01-22 08:21:27.129244: step: 228/77, loss: 0.0288016926497221 2023-01-22 08:21:28.419002: step: 232/77, loss: 7.740493310848251e-05 2023-01-22 08:21:29.668701: step: 236/77, loss: 0.012049062177538872 2023-01-22 08:21:30.947916: step: 240/77, loss: 0.0032380330376327038 2023-01-22 08:21:32.248485: step: 244/77, loss: 6.329387815640075e-06 2023-01-22 08:21:33.584234: step: 248/77, loss: 0.018659718334674835 2023-01-22 08:21:34.881736: step: 252/77, loss: 0.00016582694661337882 2023-01-22 08:21:36.181681: step: 256/77, loss: 1.3560040201809898e-07 2023-01-22 08:21:37.500975: step: 260/77, loss: 0.04998790845274925 2023-01-22 08:21:38.838137: step: 264/77, loss: 5.789184069726616e-05 2023-01-22 08:21:40.123808: step: 268/77, loss: 9.110860992223024e-05 2023-01-22 08:21:41.400442: step: 272/77, loss: 0.01263953372836113 2023-01-22 08:21:42.630637: step: 276/77, loss: 0.0018562874756753445 2023-01-22 08:21:43.975567: step: 280/77, loss: 7.022190402494743e-05 2023-01-22 08:21:45.203082: step: 284/77, loss: 0.026238013058900833 2023-01-22 08:21:46.513371: step: 288/77, loss: 0.009614682756364346 2023-01-22 08:21:47.811761: step: 292/77, loss: 1.8372948034084402e-06 2023-01-22 08:21:49.121094: step: 296/77, loss: 0.01663910411298275 2023-01-22 08:21:50.465678: step: 300/77, loss: 0.02676456980407238 2023-01-22 08:21:51.755611: step: 304/77, loss: 2.913009438998415e-06 2023-01-22 08:21:53.016195: step: 308/77, loss: 0.0046882545575499535 2023-01-22 08:21:54.318077: step: 312/77, loss: 6.624018715228885e-05 2023-01-22 08:21:55.612527: step: 316/77, loss: 1.5274457837222144e-05 2023-01-22 08:21:56.954024: step: 320/77, loss: 0.03436944633722305 2023-01-22 08:21:58.210053: step: 324/77, loss: 0.00013097852934151888 2023-01-22 08:21:59.545889: step: 328/77, loss: 0.0005952545907348394 2023-01-22 08:22:00.894788: step: 332/77, loss: 0.05988472327589989 2023-01-22 08:22:02.206664: step: 336/77, loss: 0.00022210535826161504 2023-01-22 08:22:03.542346: step: 340/77, loss: 4.118560354982037e-06 2023-01-22 08:22:04.834665: step: 344/77, loss: 0.021829169243574142 2023-01-22 08:22:06.171823: step: 348/77, loss: 9.185371163766831e-05 2023-01-22 08:22:07.510120: step: 352/77, loss: 9.739038068801165e-05 2023-01-22 08:22:08.823461: step: 356/77, loss: 0.005260576028376818 2023-01-22 08:22:10.125248: step: 360/77, loss: 0.003987782634794712 2023-01-22 08:22:11.445481: step: 364/77, loss: 0.00033517484553158283 2023-01-22 08:22:12.740157: step: 368/77, loss: 7.898044714238495e-05 2023-01-22 08:22:14.111103: step: 372/77, loss: 0.05937068909406662 2023-01-22 08:22:15.420859: step: 376/77, loss: 0.015781676396727562 2023-01-22 08:22:16.666378: step: 380/77, loss: 0.00015802726557012647 2023-01-22 08:22:17.978450: step: 384/77, loss: 0.021459216251969337 2023-01-22 08:22:19.275339: step: 388/77, loss: 0.0018189732218161225 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 19} Test Chinese: {'template': {'p': 0.9594594594594594, 'r': 0.5772357723577236, 'f1': 0.7208121827411168}, 'slot': {'p': 0.48, 'r': 0.02181818181818182, 'f1': 0.041739130434782605}, 'combined': 0.03008607371441183, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 19} Test Korean: {'template': {'p': 0.96, 'r': 0.5853658536585366, 'f1': 0.7272727272727272}, 'slot': {'p': 0.48, 'r': 0.02181818181818182, 'f1': 0.041739130434782605}, 'combined': 0.030355731225296435, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 19} Test Russian: {'template': {'p': 0.96, 'r': 0.5853658536585366, 'f1': 0.7272727272727272}, 'slot': {'p': 0.48, 'r': 0.02181818181818182, 'f1': 0.041739130434782605}, 'combined': 0.030355731225296435, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 20 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:23:58.160784: step: 4/77, loss: 0.00020769896218553185 2023-01-22 08:23:59.500618: step: 8/77, loss: 7.160383393056691e-05 2023-01-22 08:24:00.810239: step: 12/77, loss: 3.1052193207870005e-06 2023-01-22 08:24:02.076596: step: 16/77, loss: 0.0009628063999116421 2023-01-22 08:24:03.398336: step: 20/77, loss: 0.0011009183945134282 2023-01-22 08:24:04.709593: step: 24/77, loss: 0.0008464275742881 2023-01-22 08:24:06.023258: step: 28/77, loss: 0.00027758374926634133 2023-01-22 08:24:07.295335: step: 32/77, loss: 0.007552329450845718 2023-01-22 08:24:08.562839: step: 36/77, loss: 2.311597745574545e-05 2023-01-22 08:24:09.838146: step: 40/77, loss: 0.00032255385303869843 2023-01-22 08:24:11.116389: step: 44/77, loss: 0.00015895852993708104 2023-01-22 08:24:12.429090: step: 48/77, loss: 0.01976187527179718 2023-01-22 08:24:13.708446: step: 52/77, loss: 0.0017516795778647065 2023-01-22 08:24:15.017282: step: 56/77, loss: 2.9258440918056294e-05 2023-01-22 08:24:16.320836: step: 60/77, loss: 5.6702177971601486e-05 2023-01-22 08:24:17.643136: step: 64/77, loss: 0.004498021677136421 2023-01-22 08:24:18.887231: step: 68/77, loss: 3.304888014099561e-05 2023-01-22 08:24:20.179939: step: 72/77, loss: 0.00022221094695851207 2023-01-22 08:24:21.505307: step: 76/77, loss: 0.00012549127859529108 2023-01-22 08:24:22.805256: step: 80/77, loss: 9.402490945831232e-07 2023-01-22 08:24:24.103233: step: 84/77, loss: 0.0007180677494034171 2023-01-22 08:24:25.326304: step: 88/77, loss: 0.00019054979202337563 2023-01-22 08:24:26.588407: step: 92/77, loss: 0.0010646095033735037 2023-01-22 08:24:27.865359: step: 96/77, loss: 0.0002717445604503155 2023-01-22 08:24:29.118621: step: 100/77, loss: 0.004205161239951849 2023-01-22 08:24:30.419211: step: 104/77, loss: 0.0046676695346832275 2023-01-22 08:24:31.700059: step: 108/77, loss: 0.00028675587964244187 2023-01-22 08:24:33.063073: step: 112/77, loss: 0.0002091683418257162 2023-01-22 08:24:34.363156: step: 116/77, loss: 0.0005976128159090877 2023-01-22 08:24:35.661830: step: 120/77, loss: 0.04944419860839844 2023-01-22 08:24:36.950619: step: 124/77, loss: 7.888769687269814e-06 2023-01-22 08:24:38.240494: step: 128/77, loss: 2.536001557018608e-05 2023-01-22 08:24:39.496747: step: 132/77, loss: 1.2457341654226184e-05 2023-01-22 08:24:40.830754: step: 136/77, loss: 4.223461291985586e-05 2023-01-22 08:24:42.164073: step: 140/77, loss: 0.0043779960833489895 2023-01-22 08:24:43.470117: step: 144/77, loss: 0.00011629718210315332 2023-01-22 08:24:44.802329: step: 148/77, loss: 0.011396033689379692 2023-01-22 08:24:46.115757: step: 152/77, loss: 0.0025481493212282658 2023-01-22 08:24:47.410180: step: 156/77, loss: 0.013005420565605164 2023-01-22 08:24:48.660608: step: 160/77, loss: 0.07505260407924652 2023-01-22 08:24:49.974515: step: 164/77, loss: 0.00014636758714914322 2023-01-22 08:24:51.295775: step: 168/77, loss: 0.0018843450816348195 2023-01-22 08:24:52.592080: step: 172/77, loss: 0.007612647954374552 2023-01-22 08:24:53.895081: step: 176/77, loss: 0.0030222514178603888 2023-01-22 08:24:55.212886: step: 180/77, loss: 9.826284076552838e-05 2023-01-22 08:24:56.544102: step: 184/77, loss: 0.0009221957880072296 2023-01-22 08:24:57.836469: step: 188/77, loss: 0.007369095925241709 2023-01-22 08:24:59.100860: step: 192/77, loss: 5.647513603435073e-07 2023-01-22 08:25:00.401479: step: 196/77, loss: 0.003735194681212306 2023-01-22 08:25:01.686308: step: 200/77, loss: 0.0003531751863192767 2023-01-22 08:25:03.024225: step: 204/77, loss: 0.00419339956715703 2023-01-22 08:25:04.324372: step: 208/77, loss: 9.134208085015416e-05 2023-01-22 08:25:05.695232: step: 212/77, loss: 7.7279910328798e-05 2023-01-22 08:25:07.006995: step: 216/77, loss: 1.5623803847120143e-05 2023-01-22 08:25:08.307644: step: 220/77, loss: 0.002031370997428894 2023-01-22 08:25:09.585500: step: 224/77, loss: 0.03100615181028843 2023-01-22 08:25:10.869472: step: 228/77, loss: 0.0006410797941498458 2023-01-22 08:25:12.202949: step: 232/77, loss: 3.231957407479058e-06 2023-01-22 08:25:13.511646: step: 236/77, loss: 4.3285936044412665e-06 2023-01-22 08:25:14.835215: step: 240/77, loss: 0.010497227311134338 2023-01-22 08:25:16.206679: step: 244/77, loss: 0.00025311787612736225 2023-01-22 08:25:17.571314: step: 248/77, loss: 0.01094940584152937 2023-01-22 08:25:18.871976: step: 252/77, loss: 0.005903997924178839 2023-01-22 08:25:20.191281: step: 256/77, loss: 0.0039853062480688095 2023-01-22 08:25:21.509613: step: 260/77, loss: 0.008176511153578758 2023-01-22 08:25:22.809669: step: 264/77, loss: 0.009724855422973633 2023-01-22 08:25:24.108379: step: 268/77, loss: 0.005896150600165129 2023-01-22 08:25:25.430776: step: 272/77, loss: 0.025987211614847183 2023-01-22 08:25:26.737277: step: 276/77, loss: 1.1424914191593416e-05 2023-01-22 08:25:28.045921: step: 280/77, loss: 0.00039195033605210483 2023-01-22 08:25:29.348077: step: 284/77, loss: 0.0040579866617918015 2023-01-22 08:25:30.649165: step: 288/77, loss: 4.544458988675615e-06 2023-01-22 08:25:31.928666: step: 292/77, loss: 9.509296796750277e-05 2023-01-22 08:25:33.223806: step: 296/77, loss: 0.08171162754297256 2023-01-22 08:25:34.505256: step: 300/77, loss: 2.0242150640115142e-05 2023-01-22 08:25:35.767048: step: 304/77, loss: 0.0001572638429934159 2023-01-22 08:25:37.067174: step: 308/77, loss: 0.002182783093303442 2023-01-22 08:25:38.376852: step: 312/77, loss: 0.00037773323128931224 2023-01-22 08:25:39.715062: step: 316/77, loss: 0.0036809672601521015 2023-01-22 08:25:40.987872: step: 320/77, loss: 0.002347944537177682 2023-01-22 08:25:42.338216: step: 324/77, loss: 0.05394945666193962 2023-01-22 08:25:43.648059: step: 328/77, loss: 0.0023451726883649826 2023-01-22 08:25:44.958990: step: 332/77, loss: 0.01851961389183998 2023-01-22 08:25:46.268867: step: 336/77, loss: 4.4434455048758537e-05 2023-01-22 08:25:47.632901: step: 340/77, loss: 0.006946150679141283 2023-01-22 08:25:48.951685: step: 344/77, loss: 0.01549392007291317 2023-01-22 08:25:50.288041: step: 348/77, loss: 4.792132676811889e-05 2023-01-22 08:25:51.580796: step: 352/77, loss: 4.237364009895828e-06 2023-01-22 08:25:52.886607: step: 356/77, loss: 1.0000697329815011e-05 2023-01-22 08:25:54.190362: step: 360/77, loss: 0.00021215454034972936 2023-01-22 08:25:55.490091: step: 364/77, loss: 0.029700905084609985 2023-01-22 08:25:56.787236: step: 368/77, loss: 6.0535454394994304e-05 2023-01-22 08:25:58.107031: step: 372/77, loss: 0.000546629133168608 2023-01-22 08:25:59.406640: step: 376/77, loss: 0.0011142148869112134 2023-01-22 08:26:00.691524: step: 380/77, loss: 0.003580347867682576 2023-01-22 08:26:01.967098: step: 384/77, loss: 0.0002462729753460735 2023-01-22 08:26:03.268830: step: 388/77, loss: 4.011456621810794e-05 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Chinese: {'template': {'p': 0.9487179487179487, 'r': 0.6016260162601627, 'f1': 0.736318407960199}, 'slot': {'p': 0.4375, 'r': 0.019090909090909092, 'f1': 0.036585365853658534}, 'combined': 0.026938478340007278, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Korean: {'template': {'p': 0.9487179487179487, 'r': 0.6016260162601627, 'f1': 0.736318407960199}, 'slot': {'p': 0.4583333333333333, 'r': 0.02, 'f1': 0.03832752613240418}, 'combined': 0.02822126302286477, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Russian: {'template': {'p': 0.9487179487179487, 'r': 0.6016260162601627, 'f1': 0.736318407960199}, 'slot': {'p': 0.4666666666666667, 'r': 0.019090909090909092, 'f1': 0.036681222707423584}, 'combined': 0.027009059505963634, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 21 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:27:41.820174: step: 4/77, loss: 0.03239181637763977 2023-01-22 08:27:43.090179: step: 8/77, loss: 0.017717987298965454 2023-01-22 08:27:44.428521: step: 12/77, loss: 6.446881343435962e-06 2023-01-22 08:27:45.713198: step: 16/77, loss: 0.008411260321736336 2023-01-22 08:27:46.976505: step: 20/77, loss: 3.678425491671078e-05 2023-01-22 08:27:48.300571: step: 24/77, loss: 7.518615802837303e-06 2023-01-22 08:27:49.604251: step: 28/77, loss: 8.049477764870971e-05 2023-01-22 08:27:50.913986: step: 32/77, loss: 0.00012354887439869344 2023-01-22 08:27:52.174996: step: 36/77, loss: 1.3187103604650474e-06 2023-01-22 08:27:53.458230: step: 40/77, loss: 8.020484528969973e-05 2023-01-22 08:27:54.749707: step: 44/77, loss: 0.0003721020766533911 2023-01-22 08:27:56.105427: step: 48/77, loss: 4.947678462485783e-05 2023-01-22 08:27:57.408802: step: 52/77, loss: 0.006109760142862797 2023-01-22 08:27:58.720056: step: 56/77, loss: 0.03650211915373802 2023-01-22 08:28:00.002687: step: 60/77, loss: 1.3884582585887983e-05 2023-01-22 08:28:01.262780: step: 64/77, loss: 2.6498164515942335e-05 2023-01-22 08:28:02.505471: step: 68/77, loss: 0.0009089701343327761 2023-01-22 08:28:03.811059: step: 72/77, loss: 0.000358206540113315 2023-01-22 08:28:05.107746: step: 76/77, loss: 0.028453640639781952 2023-01-22 08:28:06.430718: step: 80/77, loss: 0.0003351859631948173 2023-01-22 08:28:07.713773: step: 84/77, loss: 4.7906294639687985e-05 2023-01-22 08:28:09.035625: step: 88/77, loss: 0.00013332579692360014 2023-01-22 08:28:10.364934: step: 92/77, loss: 0.0007689341437071562 2023-01-22 08:28:11.645218: step: 96/77, loss: 0.0031823881436139345 2023-01-22 08:28:12.991356: step: 100/77, loss: 0.00011390875442884862 2023-01-22 08:28:14.319595: step: 104/77, loss: 0.0002913151402026415 2023-01-22 08:28:15.555332: step: 108/77, loss: 8.261164111900143e-06 2023-01-22 08:28:16.856679: step: 112/77, loss: 5.380424227041658e-06 2023-01-22 08:28:18.170756: step: 116/77, loss: 0.005360376555472612 2023-01-22 08:28:19.502018: step: 120/77, loss: 2.903307176893577e-05 2023-01-22 08:28:20.820768: step: 124/77, loss: 2.8209698939463124e-05 2023-01-22 08:28:22.132699: step: 128/77, loss: 4.687665568781085e-05 2023-01-22 08:28:23.446997: step: 132/77, loss: 0.0002770746359601617 2023-01-22 08:28:24.731661: step: 136/77, loss: 0.00012425713066477329 2023-01-22 08:28:26.024359: step: 140/77, loss: 0.00012386722664814442 2023-01-22 08:28:27.281016: step: 144/77, loss: 0.04620302841067314 2023-01-22 08:28:28.570198: step: 148/77, loss: 0.023962827399373055 2023-01-22 08:28:29.910714: step: 152/77, loss: 1.7255728380405344e-05 2023-01-22 08:28:31.203390: step: 156/77, loss: 0.0002274275029776618 2023-01-22 08:28:32.461908: step: 160/77, loss: 1.3703098375117406e-05 2023-01-22 08:28:33.803981: step: 164/77, loss: 4.8758891352918e-05 2023-01-22 08:28:35.117221: step: 168/77, loss: 0.009159489534795284 2023-01-22 08:28:36.395814: step: 172/77, loss: 0.0037564877420663834 2023-01-22 08:28:37.704520: step: 176/77, loss: 0.0002380924706812948 2023-01-22 08:28:38.956850: step: 180/77, loss: 5.755915935878875e-06 2023-01-22 08:28:40.255779: step: 184/77, loss: 5.0621150876395404e-05 2023-01-22 08:28:41.545691: step: 188/77, loss: 2.6403948140796274e-05 2023-01-22 08:28:42.825608: step: 192/77, loss: 0.00014365346578415483 2023-01-22 08:28:44.130365: step: 196/77, loss: 0.002432482549920678 2023-01-22 08:28:45.408770: step: 200/77, loss: 4.91056744067464e-05 2023-01-22 08:28:46.714271: step: 204/77, loss: 0.0019472897984087467 2023-01-22 08:28:48.023030: step: 208/77, loss: 0.049116019159555435 2023-01-22 08:28:49.309740: step: 212/77, loss: 0.00023824565869290382 2023-01-22 08:28:50.570456: step: 216/77, loss: 0.0004655311640817672 2023-01-22 08:28:51.863355: step: 220/77, loss: 5.9485246310941875e-05 2023-01-22 08:28:53.156814: step: 224/77, loss: 0.003002239391207695 2023-01-22 08:28:54.453681: step: 228/77, loss: 0.0006095260032452643 2023-01-22 08:28:55.728052: step: 232/77, loss: 0.027523092925548553 2023-01-22 08:28:57.035858: step: 236/77, loss: 0.13839784264564514 2023-01-22 08:28:58.353238: step: 240/77, loss: 0.0003813329676631838 2023-01-22 08:28:59.668927: step: 244/77, loss: 0.0010819652816280723 2023-01-22 08:29:01.013130: step: 248/77, loss: 0.0006821186398155987 2023-01-22 08:29:02.251518: step: 252/77, loss: 1.6465527323816787e-06 2023-01-22 08:29:03.556245: step: 256/77, loss: 6.989516805333551e-06 2023-01-22 08:29:04.858456: step: 260/77, loss: 0.0004569220182020217 2023-01-22 08:29:06.140343: step: 264/77, loss: 2.5564697352820076e-05 2023-01-22 08:29:07.423081: step: 268/77, loss: 0.0005146386101841927 2023-01-22 08:29:08.717176: step: 272/77, loss: 0.00024345022393390536 2023-01-22 08:29:10.017379: step: 276/77, loss: 2.1641742932843044e-05 2023-01-22 08:29:11.303105: step: 280/77, loss: 5.6872628192650154e-05 2023-01-22 08:29:12.630105: step: 284/77, loss: 0.004902479238808155 2023-01-22 08:29:13.932466: step: 288/77, loss: 8.766914834268391e-05 2023-01-22 08:29:15.202647: step: 292/77, loss: 2.6567552140477346e-06 2023-01-22 08:29:16.507659: step: 296/77, loss: 0.026077698916196823 2023-01-22 08:29:17.794849: step: 300/77, loss: 0.0014514094218611717 2023-01-22 08:29:19.087643: step: 304/77, loss: 4.962047341905418e-07 2023-01-22 08:29:20.425948: step: 308/77, loss: 4.67259633296635e-06 2023-01-22 08:29:21.696480: step: 312/77, loss: 0.026495279744267464 2023-01-22 08:29:23.024662: step: 316/77, loss: 0.0038940461818128824 2023-01-22 08:29:24.295320: step: 320/77, loss: 0.029748141765594482 2023-01-22 08:29:25.610390: step: 324/77, loss: 0.00021692071459256113 2023-01-22 08:29:26.920358: step: 328/77, loss: 0.004932933486998081 2023-01-22 08:29:28.173246: step: 332/77, loss: 0.00018782397091854364 2023-01-22 08:29:29.453971: step: 336/77, loss: 0.0314268097281456 2023-01-22 08:29:30.785275: step: 340/77, loss: 9.739911183714867e-05 2023-01-22 08:29:32.076320: step: 344/77, loss: 0.0005904044955968857 2023-01-22 08:29:33.367962: step: 348/77, loss: 0.005018756724894047 2023-01-22 08:29:34.719829: step: 352/77, loss: 0.0028589535504579544 2023-01-22 08:29:36.022894: step: 356/77, loss: 0.13345322012901306 2023-01-22 08:29:37.343376: step: 360/77, loss: 0.005826289765536785 2023-01-22 08:29:38.626486: step: 364/77, loss: 0.000740259129088372 2023-01-22 08:29:39.870253: step: 368/77, loss: 0.010807367041707039 2023-01-22 08:29:41.159450: step: 372/77, loss: 0.049246374517679214 2023-01-22 08:29:42.444308: step: 376/77, loss: 2.707458406803198e-06 2023-01-22 08:29:43.788834: step: 380/77, loss: 0.0026401567738503218 2023-01-22 08:29:45.093592: step: 384/77, loss: 0.01615547016263008 2023-01-22 08:29:46.438618: step: 388/77, loss: 3.602041761041619e-05 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Chinese: {'template': {'p': 0.9342105263157895, 'r': 0.5772357723577236, 'f1': 0.71356783919598}, 'slot': {'p': 0.44, 'r': 0.02, 'f1': 0.03826086956521739}, 'combined': 0.027301726021411406, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Korean: {'template': {'p': 0.9342105263157895, 'r': 0.5772357723577236, 'f1': 0.71356783919598}, 'slot': {'p': 0.44, 'r': 0.02, 'f1': 0.03826086956521739}, 'combined': 0.027301726021411406, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Russian: {'template': {'p': 0.9342105263157895, 'r': 0.5772357723577236, 'f1': 0.71356783919598}, 'slot': {'p': 0.4489795918367347, 'r': 0.02, 'f1': 0.038294168842471714}, 'combined': 0.02732548731472856, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 22 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:31:24.789539: step: 4/77, loss: 0.0007809511735104024 2023-01-22 08:31:26.098953: step: 8/77, loss: 0.001551383757032454 2023-01-22 08:31:27.385089: step: 12/77, loss: 0.007308583240956068 2023-01-22 08:31:28.660519: step: 16/77, loss: 2.1040819774498232e-05 2023-01-22 08:31:29.925877: step: 20/77, loss: 0.0037530087865889072 2023-01-22 08:31:31.246946: step: 24/77, loss: 0.0008601925801485777 2023-01-22 08:31:32.540521: step: 28/77, loss: 9.703524119686335e-05 2023-01-22 08:31:33.843172: step: 32/77, loss: 0.00015865601017139852 2023-01-22 08:31:35.128032: step: 36/77, loss: 0.00012202590005472302 2023-01-22 08:31:36.423188: step: 40/77, loss: 0.011112484149634838 2023-01-22 08:31:37.695266: step: 44/77, loss: 0.000339729362167418 2023-01-22 08:31:38.935714: step: 48/77, loss: 0.0005652804975397885 2023-01-22 08:31:40.216519: step: 52/77, loss: 0.003238065168261528 2023-01-22 08:31:41.503813: step: 56/77, loss: 4.828845339943655e-05 2023-01-22 08:31:42.762974: step: 60/77, loss: 0.003508348250761628 2023-01-22 08:31:44.107915: step: 64/77, loss: 8.385243745578919e-06 2023-01-22 08:31:45.438837: step: 68/77, loss: 0.035650648176670074 2023-01-22 08:31:46.722671: step: 72/77, loss: 0.037739142775535583 2023-01-22 08:31:48.036559: step: 76/77, loss: 0.0001413650024915114 2023-01-22 08:31:49.313400: step: 80/77, loss: 0.0006701253587380052 2023-01-22 08:31:50.586069: step: 84/77, loss: 0.0020222077146172523 2023-01-22 08:31:51.916511: step: 88/77, loss: 0.0008589206263422966 2023-01-22 08:31:53.206854: step: 92/77, loss: 0.0011655199341475964 2023-01-22 08:31:54.473672: step: 96/77, loss: 0.0014887371798977256 2023-01-22 08:31:55.748921: step: 100/77, loss: 0.001581578399054706 2023-01-22 08:31:57.020972: step: 104/77, loss: 0.00011187476047780365 2023-01-22 08:31:58.314904: step: 108/77, loss: 0.00048642870387993753 2023-01-22 08:31:59.578525: step: 112/77, loss: 0.000898391823284328 2023-01-22 08:32:00.903233: step: 116/77, loss: 8.436571988568176e-06 2023-01-22 08:32:02.196282: step: 120/77, loss: 0.02246893011033535 2023-01-22 08:32:03.509959: step: 124/77, loss: 0.001038522575981915 2023-01-22 08:32:04.779547: step: 128/77, loss: 3.281307726865634e-05 2023-01-22 08:32:06.054512: step: 132/77, loss: 0.003178850281983614 2023-01-22 08:32:07.351254: step: 136/77, loss: 1.7324404325336218e-05 2023-01-22 08:32:08.666770: step: 140/77, loss: 0.0005918988026678562 2023-01-22 08:32:09.945460: step: 144/77, loss: 2.3243248506332748e-05 2023-01-22 08:32:11.200660: step: 148/77, loss: 0.0066335974261164665 2023-01-22 08:32:12.491718: step: 152/77, loss: 0.00011396820627851412 2023-01-22 08:32:13.763013: step: 156/77, loss: 0.03955855965614319 2023-01-22 08:32:15.076613: step: 160/77, loss: 9.686138218967244e-05 2023-01-22 08:32:16.388720: step: 164/77, loss: 0.021154014393687248 2023-01-22 08:32:17.697153: step: 168/77, loss: 0.00011484503920655698 2023-01-22 08:32:19.044723: step: 172/77, loss: 0.0008551405044272542 2023-01-22 08:32:20.400660: step: 176/77, loss: 0.02964947558939457 2023-01-22 08:32:21.717300: step: 180/77, loss: 0.0017165419412776828 2023-01-22 08:32:23.053873: step: 184/77, loss: 0.004964523948729038 2023-01-22 08:32:24.371061: step: 188/77, loss: 0.002340970328077674 2023-01-22 08:32:25.714520: step: 192/77, loss: 0.0009994141291826963 2023-01-22 08:32:27.027842: step: 196/77, loss: 0.005752595141530037 2023-01-22 08:32:28.261394: step: 200/77, loss: 4.916898251394741e-05 2023-01-22 08:32:29.562547: step: 204/77, loss: 0.0013054257724434137 2023-01-22 08:32:30.849049: step: 208/77, loss: 5.486142617883161e-05 2023-01-22 08:32:32.139313: step: 212/77, loss: 0.00024407217279076576 2023-01-22 08:32:33.474177: step: 216/77, loss: 1.1915855793631636e-05 2023-01-22 08:32:34.800315: step: 220/77, loss: 0.0007540610968135297 2023-01-22 08:32:36.118424: step: 224/77, loss: 0.00014220400771591812 2023-01-22 08:32:37.410500: step: 228/77, loss: 0.07779377698898315 2023-01-22 08:32:38.688892: step: 232/77, loss: 0.032325536012649536 2023-01-22 08:32:39.945136: step: 236/77, loss: 5.522904302779352e-06 2023-01-22 08:32:41.198513: step: 240/77, loss: 0.0035868207924067974 2023-01-22 08:32:42.524049: step: 244/77, loss: 0.004020586609840393 2023-01-22 08:32:43.805016: step: 248/77, loss: 0.0007980067748576403 2023-01-22 08:32:45.137485: step: 252/77, loss: 1.5416650057886727e-05 2023-01-22 08:32:46.413066: step: 256/77, loss: 1.2635971415875247e-06 2023-01-22 08:32:47.737128: step: 260/77, loss: 0.0012691058218479156 2023-01-22 08:32:48.990130: step: 264/77, loss: 0.016745002940297127 2023-01-22 08:32:50.292266: step: 268/77, loss: 0.0318622961640358 2023-01-22 08:32:51.618404: step: 272/77, loss: 8.759008051129058e-05 2023-01-22 08:32:52.909945: step: 276/77, loss: 0.0004965168191120028 2023-01-22 08:32:54.182852: step: 280/77, loss: 1.5795176011579315e-07 2023-01-22 08:32:55.433874: step: 284/77, loss: 2.293473153258674e-05 2023-01-22 08:32:56.754493: step: 288/77, loss: 3.388850382179953e-05 2023-01-22 08:32:58.035112: step: 292/77, loss: 0.009840208105742931 2023-01-22 08:32:59.323782: step: 296/77, loss: 0.0010038908803835511 2023-01-22 08:33:00.640582: step: 300/77, loss: 4.372560579213314e-05 2023-01-22 08:33:01.901902: step: 304/77, loss: 0.0003445250040385872 2023-01-22 08:33:03.216858: step: 308/77, loss: 0.00039814409683458507 2023-01-22 08:33:04.509902: step: 312/77, loss: 0.0046538361348211765 2023-01-22 08:33:05.804147: step: 316/77, loss: 0.022742586210370064 2023-01-22 08:33:07.128954: step: 320/77, loss: 0.01748577691614628 2023-01-22 08:33:08.401142: step: 324/77, loss: 0.00017750824918039143 2023-01-22 08:33:09.674258: step: 328/77, loss: 3.159026960020128e-07 2023-01-22 08:33:10.938271: step: 332/77, loss: 0.00035073619801551104 2023-01-22 08:33:12.233105: step: 336/77, loss: 4.3746422306867316e-05 2023-01-22 08:33:13.537145: step: 340/77, loss: 0.045224498957395554 2023-01-22 08:33:14.875144: step: 344/77, loss: 0.03973131626844406 2023-01-22 08:33:16.126413: step: 348/77, loss: 0.0003504282212816179 2023-01-22 08:33:17.416488: step: 352/77, loss: 7.479263877030462e-05 2023-01-22 08:33:18.717224: step: 356/77, loss: 0.00043065831414423883 2023-01-22 08:33:20.004482: step: 360/77, loss: 0.0263584665954113 2023-01-22 08:33:21.292226: step: 364/77, loss: 7.684577576583251e-05 2023-01-22 08:33:22.581641: step: 368/77, loss: 7.579052908113226e-05 2023-01-22 08:33:23.882341: step: 372/77, loss: 0.005002635531127453 2023-01-22 08:33:25.159686: step: 376/77, loss: 0.047192104160785675 2023-01-22 08:33:26.421782: step: 380/77, loss: 0.0021299412474036217 2023-01-22 08:33:27.647392: step: 384/77, loss: 0.0011768265394493937 2023-01-22 08:33:28.916288: step: 388/77, loss: 0.02807861752808094 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Chinese: {'template': {'p': 0.974025974025974, 'r': 0.6097560975609756, 'f1': 0.7500000000000001}, 'slot': {'p': 0.5, 'r': 0.02, 'f1': 0.038461538461538464}, 'combined': 0.02884615384615385, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Korean: {'template': {'p': 0.974025974025974, 'r': 0.6097560975609756, 'f1': 0.7500000000000001}, 'slot': {'p': 0.5, 'r': 0.019090909090909092, 'f1': 0.03677758318739055}, 'combined': 0.027583187390542916, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Russian: {'template': {'p': 0.974025974025974, 'r': 0.6097560975609756, 'f1': 0.7500000000000001}, 'slot': {'p': 0.5116279069767442, 'r': 0.02, 'f1': 0.03849518810148731}, 'combined': 0.02887139107611549, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 23 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:35:04.550529: step: 4/77, loss: 0.000223344934056513 2023-01-22 08:35:05.817055: step: 8/77, loss: 0.0002499055990483612 2023-01-22 08:35:07.130214: step: 12/77, loss: 0.003980610053986311 2023-01-22 08:35:08.425983: step: 16/77, loss: 0.0001560335367685184 2023-01-22 08:35:09.662525: step: 20/77, loss: 0.009259466081857681 2023-01-22 08:35:10.951815: step: 24/77, loss: 0.005971284583210945 2023-01-22 08:35:12.261686: step: 28/77, loss: 0.0023534297943115234 2023-01-22 08:35:13.517184: step: 32/77, loss: 0.0012932337122038007 2023-01-22 08:35:14.774423: step: 36/77, loss: 0.06185340881347656 2023-01-22 08:35:16.066187: step: 40/77, loss: 0.027082540094852448 2023-01-22 08:35:17.332958: step: 44/77, loss: 5.294245056575164e-05 2023-01-22 08:35:18.633555: step: 48/77, loss: 1.238259415003995e-06 2023-01-22 08:35:19.901409: step: 52/77, loss: 2.2649683728559467e-07 2023-01-22 08:35:21.173506: step: 56/77, loss: 0.0020577346440404654 2023-01-22 08:35:22.460355: step: 60/77, loss: 0.0 2023-01-22 08:35:23.732907: step: 64/77, loss: 4.52091044280678e-05 2023-01-22 08:35:25.052400: step: 68/77, loss: 1.0430811769879256e-08 2023-01-22 08:35:26.362647: step: 72/77, loss: 0.0011307750828564167 2023-01-22 08:35:27.635088: step: 76/77, loss: 5.794014214188792e-05 2023-01-22 08:35:28.895379: step: 80/77, loss: 1.281494945715167e-07 2023-01-22 08:35:30.146375: step: 84/77, loss: 0.000748545688111335 2023-01-22 08:35:31.397789: step: 88/77, loss: 3.0113683351373766e-06 2023-01-22 08:35:32.709516: step: 92/77, loss: 0.003903226926922798 2023-01-22 08:35:34.049925: step: 96/77, loss: 0.031046070158481598 2023-01-22 08:35:35.335342: step: 100/77, loss: 9.00256636668928e-05 2023-01-22 08:35:36.598607: step: 104/77, loss: 8.201535092666745e-05 2023-01-22 08:35:37.911529: step: 108/77, loss: 1.2367941337743105e-07 2023-01-22 08:35:39.188585: step: 112/77, loss: 0.0028015454299747944 2023-01-22 08:35:40.521074: step: 116/77, loss: 1.5853851209612912e-06 2023-01-22 08:35:41.819096: step: 120/77, loss: 1.0672863936633803e-05 2023-01-22 08:35:43.120597: step: 124/77, loss: 6.288173040047695e-07 2023-01-22 08:35:44.443865: step: 128/77, loss: 0.002125179162248969 2023-01-22 08:35:45.713574: step: 132/77, loss: 0.0003999666660092771 2023-01-22 08:35:46.968240: step: 136/77, loss: 0.06220989674329758 2023-01-22 08:35:48.239717: step: 140/77, loss: 0.00030411180341616273 2023-01-22 08:35:49.482620: step: 144/77, loss: 0.00021978866425342858 2023-01-22 08:35:50.803289: step: 148/77, loss: 0.0013275524834170938 2023-01-22 08:35:52.120869: step: 152/77, loss: 0.0007778856670483947 2023-01-22 08:35:53.447132: step: 156/77, loss: 0.03293122723698616 2023-01-22 08:35:54.708358: step: 160/77, loss: 1.7612574083614163e-06 2023-01-22 08:35:55.996977: step: 164/77, loss: 0.043627262115478516 2023-01-22 08:35:57.265003: step: 168/77, loss: 0.001226570806466043 2023-01-22 08:35:58.517952: step: 172/77, loss: 0.00017758070316631347 2023-01-22 08:35:59.832808: step: 176/77, loss: 3.354599903104827e-05 2023-01-22 08:36:01.123938: step: 180/77, loss: 0.03583592548966408 2023-01-22 08:36:02.383855: step: 184/77, loss: 5.370784492697567e-05 2023-01-22 08:36:03.691709: step: 188/77, loss: 0.0017548176692798734 2023-01-22 08:36:04.975638: step: 192/77, loss: 0.02631654031574726 2023-01-22 08:36:06.239982: step: 196/77, loss: 9.08508081920445e-06 2023-01-22 08:36:07.497855: step: 200/77, loss: 0.0017153595108538866 2023-01-22 08:36:08.870361: step: 204/77, loss: 0.00011920402175746858 2023-01-22 08:36:10.159484: step: 208/77, loss: 0.03767101839184761 2023-01-22 08:36:11.442740: step: 212/77, loss: 2.3928218070068397e-05 2023-01-22 08:36:12.765952: step: 216/77, loss: 3.788954472838668e-06 2023-01-22 08:36:14.023848: step: 220/77, loss: 0.0002056130178971216 2023-01-22 08:36:15.314578: step: 224/77, loss: 0.000769798643887043 2023-01-22 08:36:16.597123: step: 228/77, loss: 0.00010402412590337917 2023-01-22 08:36:17.886841: step: 232/77, loss: 0.00010068294795928523 2023-01-22 08:36:19.158569: step: 236/77, loss: 0.002481456147506833 2023-01-22 08:36:20.452161: step: 240/77, loss: 5.516312376130372e-05 2023-01-22 08:36:21.781995: step: 244/77, loss: 3.2243500754702836e-05 2023-01-22 08:36:23.082225: step: 248/77, loss: 0.0004081795923411846 2023-01-22 08:36:24.379633: step: 252/77, loss: 8.850173799146432e-06 2023-01-22 08:36:25.624685: step: 256/77, loss: 0.0003328286111354828 2023-01-22 08:36:26.923097: step: 260/77, loss: 0.0030909974593669176 2023-01-22 08:36:28.190904: step: 264/77, loss: 4.4013545448251534e-06 2023-01-22 08:36:29.489208: step: 268/77, loss: 0.012226199731230736 2023-01-22 08:36:30.819441: step: 272/77, loss: 0.0010382995242252946 2023-01-22 08:36:32.108565: step: 276/77, loss: 0.00013937246694695204 2023-01-22 08:36:33.426997: step: 280/77, loss: 0.021476108580827713 2023-01-22 08:36:34.735604: step: 284/77, loss: 0.009720050729811192 2023-01-22 08:36:36.060112: step: 288/77, loss: 0.005399320740252733 2023-01-22 08:36:37.360837: step: 292/77, loss: 4.12450208386872e-06 2023-01-22 08:36:38.588928: step: 296/77, loss: 0.0008546271128579974 2023-01-22 08:36:39.893498: step: 300/77, loss: 0.0015399702824652195 2023-01-22 08:36:41.181704: step: 304/77, loss: 0.0012370938202366233 2023-01-22 08:36:42.475891: step: 308/77, loss: 0.024090373888611794 2023-01-22 08:36:43.737108: step: 312/77, loss: 6.0301408666418865e-06 2023-01-22 08:36:45.036784: step: 316/77, loss: 1.606405567144975e-05 2023-01-22 08:36:46.322876: step: 320/77, loss: 0.0001153816920123063 2023-01-22 08:36:47.577252: step: 324/77, loss: 0.0015452578663825989 2023-01-22 08:36:48.868006: step: 328/77, loss: 0.0007853202987462282 2023-01-22 08:36:50.149053: step: 332/77, loss: 4.006969174952246e-05 2023-01-22 08:36:51.448644: step: 336/77, loss: 4.237548182572937e-06 2023-01-22 08:36:52.751816: step: 340/77, loss: 5.247162334853783e-05 2023-01-22 08:36:54.059704: step: 344/77, loss: 4.022320354124531e-05 2023-01-22 08:36:55.341919: step: 348/77, loss: 0.004062959458678961 2023-01-22 08:36:56.633675: step: 352/77, loss: 5.144433816894889e-05 2023-01-22 08:36:57.925392: step: 356/77, loss: 0.04348785802721977 2023-01-22 08:36:59.176867: step: 360/77, loss: 2.714705260586925e-05 2023-01-22 08:37:00.493920: step: 364/77, loss: 7.023421494523063e-05 2023-01-22 08:37:01.786956: step: 368/77, loss: 0.003611247520893812 2023-01-22 08:37:03.092189: step: 372/77, loss: 6.616061227759928e-07 2023-01-22 08:37:04.442766: step: 376/77, loss: 0.0027029793709516525 2023-01-22 08:37:05.744954: step: 380/77, loss: 3.5358483728487045e-05 2023-01-22 08:37:07.061168: step: 384/77, loss: 5.154731115908362e-05 2023-01-22 08:37:08.344471: step: 388/77, loss: 0.0008300709305331111 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Chinese: {'template': {'p': 0.9620253164556962, 'r': 0.6178861788617886, 'f1': 0.7524752475247525}, 'slot': {'p': 0.5106382978723404, 'r': 0.02181818181818182, 'f1': 0.041848299912816043}, 'combined': 0.03148980983538633, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Korean: {'template': {'p': 0.9615384615384616, 'r': 0.6097560975609756, 'f1': 0.7462686567164178}, 'slot': {'p': 0.5111111111111111, 'r': 0.02090909090909091, 'f1': 0.04017467248908297}, 'combined': 0.029981098872449975, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Russian: {'template': {'p': 0.9615384615384616, 'r': 0.6097560975609756, 'f1': 0.7462686567164178}, 'slot': {'p': 0.5217391304347826, 'r': 0.02181818181818182, 'f1': 0.041884816753926704}, 'combined': 0.031257325935766196, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 24 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:38:44.046383: step: 4/77, loss: 0.000768980011343956 2023-01-22 08:38:45.320085: step: 8/77, loss: 0.0007521238876506686 2023-01-22 08:38:46.618926: step: 12/77, loss: 0.0003945432836189866 2023-01-22 08:38:47.891944: step: 16/77, loss: 2.3855304789321963e-06 2023-01-22 08:38:49.154871: step: 20/77, loss: 0.0001138556472142227 2023-01-22 08:38:50.417714: step: 24/77, loss: 1.5491634258069098e-05 2023-01-22 08:38:51.716339: step: 28/77, loss: 3.6268065741751343e-06 2023-01-22 08:38:52.980242: step: 32/77, loss: 0.01696803607046604 2023-01-22 08:38:54.260937: step: 36/77, loss: 1.2886805961898062e-05 2023-01-22 08:38:55.502923: step: 40/77, loss: 0.05772382393479347 2023-01-22 08:38:56.771084: step: 44/77, loss: 8.849301957525313e-05 2023-01-22 08:38:58.076290: step: 48/77, loss: 3.100880803685868e-06 2023-01-22 08:38:59.331332: step: 52/77, loss: 0.021691516041755676 2023-01-22 08:39:00.580772: step: 56/77, loss: 0.00035898658097721636 2023-01-22 08:39:01.913161: step: 60/77, loss: 7.435634188368567e-07 2023-01-22 08:39:03.250587: step: 64/77, loss: 0.0022912865970283747 2023-01-22 08:39:04.597791: step: 68/77, loss: 1.6972255707514705e-06 2023-01-22 08:39:05.836954: step: 72/77, loss: 0.0005609336076304317 2023-01-22 08:39:07.138726: step: 76/77, loss: 9.501824933977332e-06 2023-01-22 08:39:08.442974: step: 80/77, loss: 0.0011109262704849243 2023-01-22 08:39:09.724449: step: 84/77, loss: 0.0012305447598919272 2023-01-22 08:39:11.012272: step: 88/77, loss: 0.0032722926698625088 2023-01-22 08:39:12.277506: step: 92/77, loss: 0.0003757534723263234 2023-01-22 08:39:13.600609: step: 96/77, loss: 0.0036070612259209156 2023-01-22 08:39:14.873257: step: 100/77, loss: 0.0010984732070937753 2023-01-22 08:39:16.164338: step: 104/77, loss: 0.011957976035773754 2023-01-22 08:39:17.451738: step: 108/77, loss: 0.00025756648392416537 2023-01-22 08:39:18.687939: step: 112/77, loss: 0.0005127987242303789 2023-01-22 08:39:19.993542: step: 116/77, loss: 3.300553362350911e-05 2023-01-22 08:39:21.326492: step: 120/77, loss: 0.0017934296047315001 2023-01-22 08:39:22.635476: step: 124/77, loss: 0.0026127288583666086 2023-01-22 08:39:23.960521: step: 128/77, loss: 0.0007074242457747459 2023-01-22 08:39:25.260369: step: 132/77, loss: 1.341101665275346e-07 2023-01-22 08:39:26.538675: step: 136/77, loss: 0.03210016340017319 2023-01-22 08:39:27.784814: step: 140/77, loss: 0.0003734312776941806 2023-01-22 08:39:29.093662: step: 144/77, loss: 4.366786015452817e-05 2023-01-22 08:39:30.404238: step: 148/77, loss: 2.6971114493790083e-05 2023-01-22 08:39:31.696591: step: 152/77, loss: 0.08323478698730469 2023-01-22 08:39:32.969140: step: 156/77, loss: 0.000449273589765653 2023-01-22 08:39:34.212482: step: 160/77, loss: 4.991859441361157e-07 2023-01-22 08:39:35.504671: step: 164/77, loss: 0.00018791876209434122 2023-01-22 08:39:36.767763: step: 168/77, loss: 0.0005903139826841652 2023-01-22 08:39:38.066184: step: 172/77, loss: 0.047918688505887985 2023-01-22 08:39:39.344689: step: 176/77, loss: 1.1026830293303647e-07 2023-01-22 08:39:40.619924: step: 180/77, loss: 1.6316491837642388e-06 2023-01-22 08:39:41.903881: step: 184/77, loss: 0.008988398127257824 2023-01-22 08:39:43.169771: step: 188/77, loss: 3.704776463564485e-05 2023-01-22 08:39:44.497613: step: 192/77, loss: 0.00014370733697433025 2023-01-22 08:39:45.754520: step: 196/77, loss: 2.8802598990296246e-06 2023-01-22 08:39:47.062571: step: 200/77, loss: 5.006774586036045e-07 2023-01-22 08:39:48.363486: step: 204/77, loss: 0.00034350191708654165 2023-01-22 08:39:49.655286: step: 208/77, loss: 0.03332750126719475 2023-01-22 08:39:50.958875: step: 212/77, loss: 1.9984458049293607e-05 2023-01-22 08:39:52.224721: step: 216/77, loss: 0.01212351955473423 2023-01-22 08:39:53.507475: step: 220/77, loss: 5.1427905418677256e-05 2023-01-22 08:39:54.755113: step: 224/77, loss: 0.02780834026634693 2023-01-22 08:39:55.992363: step: 228/77, loss: 0.00018175665172748268 2023-01-22 08:39:57.243163: step: 232/77, loss: 0.015692438930273056 2023-01-22 08:39:58.538171: step: 236/77, loss: 0.023305445909500122 2023-01-22 08:39:59.820213: step: 240/77, loss: 7.883200851210859e-06 2023-01-22 08:40:01.098457: step: 244/77, loss: 0.00015500333392992616 2023-01-22 08:40:02.326800: step: 248/77, loss: 0.0010909754782915115 2023-01-22 08:40:03.632226: step: 252/77, loss: 0.00029513833578675985 2023-01-22 08:40:04.952706: step: 256/77, loss: 2.437727061987971e-06 2023-01-22 08:40:06.252746: step: 260/77, loss: 4.5102613512426615e-05 2023-01-22 08:40:07.533049: step: 264/77, loss: 0.00025953652220778167 2023-01-22 08:40:08.813058: step: 268/77, loss: 6.334707450150745e-06 2023-01-22 08:40:10.112807: step: 272/77, loss: 0.02517692744731903 2023-01-22 08:40:11.389772: step: 276/77, loss: 0.00019247032469138503 2023-01-22 08:40:12.720743: step: 280/77, loss: 2.308360672031995e-05 2023-01-22 08:40:13.970151: step: 284/77, loss: 0.02027995139360428 2023-01-22 08:40:15.169969: step: 288/77, loss: 0.0476166270673275 2023-01-22 08:40:16.448815: step: 292/77, loss: 3.230684524169192e-05 2023-01-22 08:40:17.738036: step: 296/77, loss: 1.7493573523097439e-06 2023-01-22 08:40:19.053444: step: 300/77, loss: 8.653674740344286e-06 2023-01-22 08:40:20.297221: step: 304/77, loss: 3.8240621506702155e-05 2023-01-22 08:40:21.574243: step: 308/77, loss: 6.724038030370139e-06 2023-01-22 08:40:22.796557: step: 312/77, loss: 0.0011275878641754389 2023-01-22 08:40:24.061384: step: 316/77, loss: 1.1667386843328131e-06 2023-01-22 08:40:25.346221: step: 320/77, loss: 0.030673200264573097 2023-01-22 08:40:26.606316: step: 324/77, loss: 0.0004541727830655873 2023-01-22 08:40:27.869602: step: 328/77, loss: 0.0002860415552277118 2023-01-22 08:40:29.134224: step: 332/77, loss: 0.00033971594530157745 2023-01-22 08:40:30.427183: step: 336/77, loss: 0.000280650652712211 2023-01-22 08:40:31.745197: step: 340/77, loss: 0.0005106690223328769 2023-01-22 08:40:33.048198: step: 344/77, loss: 3.868105068249861e-06 2023-01-22 08:40:34.352684: step: 348/77, loss: 4.0064624045044184e-05 2023-01-22 08:40:35.655732: step: 352/77, loss: 7.297834963537753e-05 2023-01-22 08:40:36.957855: step: 356/77, loss: 0.0032857412006706 2023-01-22 08:40:38.275127: step: 360/77, loss: 1.8298316035725293e-06 2023-01-22 08:40:39.544741: step: 364/77, loss: 0.000829779717605561 2023-01-22 08:40:40.841762: step: 368/77, loss: 3.9558733988087624e-05 2023-01-22 08:40:42.163231: step: 372/77, loss: 2.1275000108289532e-05 2023-01-22 08:40:43.489439: step: 376/77, loss: 2.1526575437746942e-05 2023-01-22 08:40:44.755945: step: 380/77, loss: 0.00012161017366452143 2023-01-22 08:40:46.076065: step: 384/77, loss: 0.0010571739403530955 2023-01-22 08:40:47.314929: step: 388/77, loss: 0.0003159367188345641 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Chinese: {'template': {'p': 0.961038961038961, 'r': 0.6016260162601627, 'f1': 0.74}, 'slot': {'p': 0.45098039215686275, 'r': 0.02090909090909091, 'f1': 0.03996524761077324}, 'combined': 0.029574283231972198, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Korean: {'template': {'p': 0.9615384615384616, 'r': 0.6097560975609756, 'f1': 0.7462686567164178}, 'slot': {'p': 0.46, 'r': 0.02090909090909091, 'f1': 0.04}, 'combined': 0.029850746268656716, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Russian: {'template': {'p': 0.9615384615384616, 'r': 0.6097560975609756, 'f1': 0.7462686567164178}, 'slot': {'p': 0.46, 'r': 0.02090909090909091, 'f1': 0.04}, 'combined': 0.029850746268656716, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 25 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:42:22.686772: step: 4/77, loss: 0.005719621200114489 2023-01-22 08:42:23.977937: step: 8/77, loss: 0.008860756643116474 2023-01-22 08:42:25.291153: step: 12/77, loss: 0.0028032902628183365 2023-01-22 08:42:26.577586: step: 16/77, loss: 6.109435730650148e-07 2023-01-22 08:42:27.883127: step: 20/77, loss: 2.298797517141793e-05 2023-01-22 08:42:29.168162: step: 24/77, loss: 6.934415523573989e-06 2023-01-22 08:42:30.435144: step: 28/77, loss: 0.00017443645629100502 2023-01-22 08:42:31.738022: step: 32/77, loss: 3.634882523328997e-05 2023-01-22 08:42:33.011578: step: 36/77, loss: 7.09361211193027e-06 2023-01-22 08:42:34.310423: step: 40/77, loss: 0.016919534653425217 2023-01-22 08:42:35.590022: step: 44/77, loss: 8.22349829832092e-06 2023-01-22 08:42:36.817262: step: 48/77, loss: 0.0002756982285063714 2023-01-22 08:42:38.086384: step: 52/77, loss: 5.055268957221415e-06 2023-01-22 08:42:39.346733: step: 56/77, loss: 0.0003850968205370009 2023-01-22 08:42:40.652495: step: 60/77, loss: 0.0004995691706426442 2023-01-22 08:42:41.932483: step: 64/77, loss: 0.001346847042441368 2023-01-22 08:42:43.189483: step: 68/77, loss: 0.0004229228070471436 2023-01-22 08:42:44.405717: step: 72/77, loss: 0.03281763941049576 2023-01-22 08:42:45.658816: step: 76/77, loss: 0.038519665598869324 2023-01-22 08:42:46.949622: step: 80/77, loss: 0.009992430917918682 2023-01-22 08:42:48.218483: step: 84/77, loss: 0.002286661881953478 2023-01-22 08:42:49.458950: step: 88/77, loss: 0.0002374086616327986 2023-01-22 08:42:50.753262: step: 92/77, loss: 8.106205496005714e-05 2023-01-22 08:42:52.007325: step: 96/77, loss: 0.0010453971335664392 2023-01-22 08:42:53.331912: step: 100/77, loss: 0.0001758149592205882 2023-01-22 08:42:54.619930: step: 104/77, loss: 0.009996388107538223 2023-01-22 08:42:55.932431: step: 108/77, loss: 0.0004022825451102108 2023-01-22 08:42:57.179840: step: 112/77, loss: 0.004816494882106781 2023-01-22 08:42:58.446808: step: 116/77, loss: 0.005425009410828352 2023-01-22 08:42:59.715900: step: 120/77, loss: 2.0414493917542131e-07 2023-01-22 08:43:01.003714: step: 124/77, loss: 9.027479973156005e-05 2023-01-22 08:43:02.240733: step: 128/77, loss: 0.06450998783111572 2023-01-22 08:43:03.539803: step: 132/77, loss: 9.049760410562158e-05 2023-01-22 08:43:04.791225: step: 136/77, loss: 5.903685814701021e-05 2023-01-22 08:43:06.044023: step: 140/77, loss: 4.318053015595069e-06 2023-01-22 08:43:07.313686: step: 144/77, loss: 2.9786574486934114e-06 2023-01-22 08:43:08.625234: step: 148/77, loss: 3.1425793167727534e-06 2023-01-22 08:43:09.904414: step: 152/77, loss: 9.415208478458226e-05 2023-01-22 08:43:11.204943: step: 156/77, loss: 5.3753981774207205e-05 2023-01-22 08:43:12.492293: step: 160/77, loss: 0.0004957958590239286 2023-01-22 08:43:13.768681: step: 164/77, loss: 0.0008796628098934889 2023-01-22 08:43:15.056151: step: 168/77, loss: 0.002826994052156806 2023-01-22 08:43:16.324978: step: 172/77, loss: 0.00016862266056705266 2023-01-22 08:43:17.561491: step: 176/77, loss: 3.725287101019603e-08 2023-01-22 08:43:18.823521: step: 180/77, loss: 0.00611914461478591 2023-01-22 08:43:20.113361: step: 184/77, loss: 0.0009060569573193789 2023-01-22 08:43:21.467624: step: 188/77, loss: 8.879319648258388e-05 2023-01-22 08:43:22.699211: step: 192/77, loss: 0.00410617096349597 2023-01-22 08:43:23.995744: step: 196/77, loss: 0.001179800252430141 2023-01-22 08:43:25.261597: step: 200/77, loss: 1.5466710010514362e-06 2023-01-22 08:43:26.476724: step: 204/77, loss: 0.00045527133625000715 2023-01-22 08:43:27.753538: step: 208/77, loss: 0.006096724420785904 2023-01-22 08:43:29.094607: step: 212/77, loss: 0.05618688464164734 2023-01-22 08:43:30.392295: step: 216/77, loss: 0.0006208279519341886 2023-01-22 08:43:31.702924: step: 220/77, loss: 1.430508973498945e-07 2023-01-22 08:43:33.048783: step: 224/77, loss: 4.76835111840046e-07 2023-01-22 08:43:34.360290: step: 228/77, loss: 0.0012825513258576393 2023-01-22 08:43:35.652059: step: 232/77, loss: 0.00010251560888718814 2023-01-22 08:43:36.920799: step: 236/77, loss: 0.00014983654546085745 2023-01-22 08:43:38.190877: step: 240/77, loss: 0.0007250534254126251 2023-01-22 08:43:39.488202: step: 244/77, loss: 3.565517909009941e-05 2023-01-22 08:43:40.772028: step: 248/77, loss: 3.2824344089021906e-05 2023-01-22 08:43:42.042324: step: 252/77, loss: 0.0004473893204703927 2023-01-22 08:43:43.337439: step: 256/77, loss: 0.0002924882574006915 2023-01-22 08:43:44.579068: step: 260/77, loss: 0.0029194727540016174 2023-01-22 08:43:45.858295: step: 264/77, loss: 0.00955183431506157 2023-01-22 08:43:47.147546: step: 268/77, loss: 0.027585407719016075 2023-01-22 08:43:48.416657: step: 272/77, loss: 1.8800383259076625e-05 2023-01-22 08:43:49.698427: step: 276/77, loss: 0.0003292300389148295 2023-01-22 08:43:50.961727: step: 280/77, loss: 6.807313184253871e-05 2023-01-22 08:43:52.268529: step: 284/77, loss: 0.001798221142962575 2023-01-22 08:43:53.595141: step: 288/77, loss: 0.0032155895605683327 2023-01-22 08:43:54.878085: step: 292/77, loss: 1.2091225471522193e-05 2023-01-22 08:43:56.190413: step: 296/77, loss: 0.00014940323308110237 2023-01-22 08:43:57.464521: step: 300/77, loss: 0.018790228292346 2023-01-22 08:43:58.779740: step: 304/77, loss: 4.5596917175316776e-07 2023-01-22 08:44:00.093341: step: 308/77, loss: 5.038719609729014e-05 2023-01-22 08:44:01.424885: step: 312/77, loss: 0.00027558201691135764 2023-01-22 08:44:02.760302: step: 316/77, loss: 0.0001998369989451021 2023-01-22 08:44:04.051428: step: 320/77, loss: 0.00041272686212323606 2023-01-22 08:44:05.335614: step: 324/77, loss: 6.429352652048692e-06 2023-01-22 08:44:06.631295: step: 328/77, loss: 1.4448265574174002e-05 2023-01-22 08:44:07.928833: step: 332/77, loss: 0.0008989330381155014 2023-01-22 08:44:09.249221: step: 336/77, loss: 0.009158177301287651 2023-01-22 08:44:10.524273: step: 340/77, loss: 9.308809239882976e-06 2023-01-22 08:44:11.823462: step: 344/77, loss: 0.0002032999909715727 2023-01-22 08:44:13.113450: step: 348/77, loss: 8.418882089245017e-07 2023-01-22 08:44:14.433935: step: 352/77, loss: 9.536737621829161e-08 2023-01-22 08:44:15.689074: step: 356/77, loss: 0.0013869872782379389 2023-01-22 08:44:16.974520: step: 360/77, loss: 0.00016957623302005231 2023-01-22 08:44:18.284574: step: 364/77, loss: 0.0033187230583280325 2023-01-22 08:44:19.582231: step: 368/77, loss: 0.00044626800809055567 2023-01-22 08:44:20.864478: step: 372/77, loss: 0.0005619988078251481 2023-01-22 08:44:22.142827: step: 376/77, loss: 4.350573817646364e-06 2023-01-22 08:44:23.407601: step: 380/77, loss: 0.002264243783429265 2023-01-22 08:44:24.668365: step: 384/77, loss: 1.1637284842436202e-06 2023-01-22 08:44:25.982792: step: 388/77, loss: 0.026438845321536064 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.46511627906976744, 'r': 0.03780718336483932, 'f1': 0.06993006993006994}, 'combined': 0.05152741994847258, 'epoch': 25} Test Chinese: {'template': {'p': 0.9487179487179487, 'r': 0.6016260162601627, 'f1': 0.736318407960199}, 'slot': {'p': 0.5, 'r': 0.02, 'f1': 0.038461538461538464}, 'combined': 0.028319938767699962, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.46511627906976744, 'r': 0.03780718336483932, 'f1': 0.06993006993006994}, 'combined': 0.05152741994847258, 'epoch': 25} Test Korean: {'template': {'p': 0.9493670886075949, 'r': 0.6097560975609756, 'f1': 0.7425742574257426}, 'slot': {'p': 0.5116279069767442, 'r': 0.02, 'f1': 0.03849518810148731}, 'combined': 0.02858553571892622, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.46511627906976744, 'r': 0.03780718336483932, 'f1': 0.06993006993006994}, 'combined': 0.05152741994847258, 'epoch': 25} Test Russian: {'template': {'p': 0.9487179487179487, 'r': 0.6016260162601627, 'f1': 0.736318407960199}, 'slot': {'p': 0.4888888888888889, 'r': 0.02, 'f1': 0.03842794759825328}, 'combined': 0.028295205196723804, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 26 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:46:01.789369: step: 4/77, loss: 7.405794235637586e-07 2023-01-22 08:46:03.060253: step: 8/77, loss: 4.420810910232831e-06 2023-01-22 08:46:04.390209: step: 12/77, loss: 0.009897311218082905 2023-01-22 08:46:05.637682: step: 16/77, loss: 6.879275588289602e-06 2023-01-22 08:46:06.960144: step: 20/77, loss: 0.001125571085140109 2023-01-22 08:46:08.224507: step: 24/77, loss: 5.025583959650248e-05 2023-01-22 08:46:09.554546: step: 28/77, loss: 0.03362608328461647 2023-01-22 08:46:10.835043: step: 32/77, loss: 0.00027511155349202454 2023-01-22 08:46:12.131517: step: 36/77, loss: 0.012906921096146107 2023-01-22 08:46:13.425513: step: 40/77, loss: 6.1619521147804335e-06 2023-01-22 08:46:14.623999: step: 44/77, loss: 1.0552851563261356e-05 2023-01-22 08:46:15.877364: step: 48/77, loss: 0.0006020730943419039 2023-01-22 08:46:17.156901: step: 52/77, loss: 0.005116917658597231 2023-01-22 08:46:18.454779: step: 56/77, loss: 1.1251551768509671e-05 2023-01-22 08:46:19.753779: step: 60/77, loss: 0.024727124720811844 2023-01-22 08:46:21.056560: step: 64/77, loss: 8.761744538787752e-07 2023-01-22 08:46:22.354002: step: 68/77, loss: 0.006382717750966549 2023-01-22 08:46:23.595633: step: 72/77, loss: 0.0001176731166196987 2023-01-22 08:46:24.874577: step: 76/77, loss: 8.199903095373884e-05 2023-01-22 08:46:26.184012: step: 80/77, loss: 0.007674494292587042 2023-01-22 08:46:27.488069: step: 84/77, loss: 0.08472293615341187 2023-01-22 08:46:28.801989: step: 88/77, loss: 1.7702009245113004e-06 2023-01-22 08:46:30.063353: step: 92/77, loss: 0.0014696570578962564 2023-01-22 08:46:31.377484: step: 96/77, loss: 4.095823442185065e-06 2023-01-22 08:46:32.649838: step: 100/77, loss: 5.805840191897005e-05 2023-01-22 08:46:33.908305: step: 104/77, loss: 0.003786920104175806 2023-01-22 08:46:35.173393: step: 108/77, loss: 0.012779447250068188 2023-01-22 08:46:36.444376: step: 112/77, loss: 1.1473886019075508e-07 2023-01-22 08:46:37.718939: step: 116/77, loss: 4.9697860958985984e-05 2023-01-22 08:46:39.035739: step: 120/77, loss: 3.697064676089212e-05 2023-01-22 08:46:40.267339: step: 124/77, loss: 0.0015872609801590443 2023-01-22 08:46:41.530983: step: 128/77, loss: 0.006903848610818386 2023-01-22 08:46:42.806492: step: 132/77, loss: 0.005575467366725206 2023-01-22 08:46:44.097725: step: 136/77, loss: 0.005671120248734951 2023-01-22 08:46:45.398765: step: 140/77, loss: 4.039318810100667e-05 2023-01-22 08:46:46.679688: step: 144/77, loss: 0.00020730840333271772 2023-01-22 08:46:47.968983: step: 148/77, loss: 9.252969903172925e-06 2023-01-22 08:46:49.283876: step: 152/77, loss: 0.0001278579729842022 2023-01-22 08:46:50.609441: step: 156/77, loss: 0.027770182117819786 2023-01-22 08:46:51.888948: step: 160/77, loss: 4.7683688109145805e-08 2023-01-22 08:46:53.173209: step: 164/77, loss: 0.00022923552023712546 2023-01-22 08:46:54.475281: step: 168/77, loss: 2.0663854229496792e-05 2023-01-22 08:46:55.798652: step: 172/77, loss: 4.5862248953199014e-05 2023-01-22 08:46:57.092565: step: 176/77, loss: 0.0016720297280699015 2023-01-22 08:46:58.355111: step: 180/77, loss: 0.00010752508387668058 2023-01-22 08:46:59.693878: step: 184/77, loss: 0.0006299956585280597 2023-01-22 08:47:00.981782: step: 188/77, loss: 0.011298703029751778 2023-01-22 08:47:02.267033: step: 192/77, loss: 6.590948032680899e-05 2023-01-22 08:47:03.539975: step: 196/77, loss: 5.185305781196803e-06 2023-01-22 08:47:04.826975: step: 200/77, loss: 2.2649717834610783e-07 2023-01-22 08:47:06.130317: step: 204/77, loss: 0.0001983733382076025 2023-01-22 08:47:07.441152: step: 208/77, loss: 3.112400372629054e-05 2023-01-22 08:47:08.711082: step: 212/77, loss: 2.7714126190403476e-05 2023-01-22 08:47:10.083914: step: 216/77, loss: 0.00013845518697053194 2023-01-22 08:47:11.410263: step: 220/77, loss: 3.977232699980959e-05 2023-01-22 08:47:12.714311: step: 224/77, loss: 0.000929908303078264 2023-01-22 08:47:14.020387: step: 228/77, loss: 1.7579246559762396e-05 2023-01-22 08:47:15.318024: step: 232/77, loss: 0.0007625438156537712 2023-01-22 08:47:16.626862: step: 236/77, loss: 4.9652739107841626e-05 2023-01-22 08:47:17.938443: step: 240/77, loss: 1.2576102790262667e-06 2023-01-22 08:47:19.248316: step: 244/77, loss: 8.551467908546329e-05 2023-01-22 08:47:20.513457: step: 248/77, loss: 0.000155415793415159 2023-01-22 08:47:21.828629: step: 252/77, loss: 7.673896789128776e-07 2023-01-22 08:47:23.103339: step: 256/77, loss: 1.6112244338728487e-05 2023-01-22 08:47:24.427170: step: 260/77, loss: 0.020630180835723877 2023-01-22 08:47:25.698616: step: 264/77, loss: 0.0011570448987185955 2023-01-22 08:47:26.944183: step: 268/77, loss: 6.015104190737475e-06 2023-01-22 08:47:28.226889: step: 272/77, loss: 3.4272662929879516e-08 2023-01-22 08:47:29.524162: step: 276/77, loss: 0.018049897626042366 2023-01-22 08:47:30.849676: step: 280/77, loss: 0.00012221011274959892 2023-01-22 08:47:32.134601: step: 284/77, loss: 0.009114380925893784 2023-01-22 08:47:33.374058: step: 288/77, loss: 0.0005977398250252008 2023-01-22 08:47:34.665143: step: 292/77, loss: 0.002121040364727378 2023-01-22 08:47:35.916367: step: 296/77, loss: 1.7078427845262922e-05 2023-01-22 08:47:37.219232: step: 300/77, loss: 1.7234993720194325e-05 2023-01-22 08:47:38.561114: step: 304/77, loss: 2.008646333706565e-06 2023-01-22 08:47:39.907921: step: 308/77, loss: 1.3609464986075182e-05 2023-01-22 08:47:41.236781: step: 312/77, loss: 1.9376695490791462e-05 2023-01-22 08:47:42.506718: step: 316/77, loss: 0.0002871832111850381 2023-01-22 08:47:43.790223: step: 320/77, loss: 8.973329386208206e-06 2023-01-22 08:47:45.100467: step: 324/77, loss: 0.0001281930017285049 2023-01-22 08:47:46.392584: step: 328/77, loss: 0.006611789111047983 2023-01-22 08:47:47.664572: step: 332/77, loss: 6.0470832977443933e-05 2023-01-22 08:47:48.987977: step: 336/77, loss: 0.00381635595113039 2023-01-22 08:47:50.311990: step: 340/77, loss: 1.8114980775862932e-05 2023-01-22 08:47:51.610067: step: 344/77, loss: 0.0001864713995018974 2023-01-22 08:47:52.887634: step: 348/77, loss: 0.017800893634557724 2023-01-22 08:47:54.217602: step: 352/77, loss: 0.008395765908062458 2023-01-22 08:47:55.484497: step: 356/77, loss: 1.6391246049352048e-07 2023-01-22 08:47:56.761328: step: 360/77, loss: 0.0011608715867623687 2023-01-22 08:47:58.083785: step: 364/77, loss: 2.5806618850765517e-06 2023-01-22 08:47:59.382069: step: 368/77, loss: 0.00028230599127709866 2023-01-22 08:48:00.690967: step: 372/77, loss: 6.524189757328713e-06 2023-01-22 08:48:01.964173: step: 376/77, loss: 5.015195711166598e-06 2023-01-22 08:48:03.229803: step: 380/77, loss: 0.00017462043615523726 2023-01-22 08:48:04.531220: step: 384/77, loss: 2.010021489695646e-06 2023-01-22 08:48:05.835408: step: 388/77, loss: 9.521669562673196e-05 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 26} Test Chinese: {'template': {'p': 0.9487179487179487, 'r': 0.6016260162601627, 'f1': 0.736318407960199}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.02955553819037448, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 26} Test Korean: {'template': {'p': 0.9493670886075949, 'r': 0.6097560975609756, 'f1': 0.7425742574257426}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.02978065897261042, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 26} Test Russian: {'template': {'p': 0.9487179487179487, 'r': 0.6016260162601627, 'f1': 0.736318407960199}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.02952977050232707, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 27 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:49:42.204409: step: 4/77, loss: 4.3786039896076545e-05 2023-01-22 08:49:43.514557: step: 8/77, loss: 1.3262005893466267e-07 2023-01-22 08:49:44.825300: step: 12/77, loss: 0.004400265868753195 2023-01-22 08:49:46.043937: step: 16/77, loss: 1.0698581718315836e-06 2023-01-22 08:49:47.350285: step: 20/77, loss: 0.023978853598237038 2023-01-22 08:49:48.616657: step: 24/77, loss: 4.8583251555101015e-06 2023-01-22 08:49:49.878423: step: 28/77, loss: 0.03092852607369423 2023-01-22 08:49:51.153380: step: 32/77, loss: 0.0003409860364627093 2023-01-22 08:49:52.454080: step: 36/77, loss: 6.979777390370145e-05 2023-01-22 08:49:53.712970: step: 40/77, loss: 0.0005804693792015314 2023-01-22 08:49:55.036055: step: 44/77, loss: 0.011256477795541286 2023-01-22 08:49:56.291214: step: 48/77, loss: 1.1070699656556826e-05 2023-01-22 08:49:57.589635: step: 52/77, loss: 3.316633183203521e-06 2023-01-22 08:49:58.852743: step: 56/77, loss: 3.561252924555447e-06 2023-01-22 08:50:00.084386: step: 60/77, loss: 9.22357571653265e-07 2023-01-22 08:50:01.380555: step: 64/77, loss: 1.6862717529875226e-05 2023-01-22 08:50:02.661693: step: 68/77, loss: 0.00019232039630878717 2023-01-22 08:50:03.936329: step: 72/77, loss: 0.016592321917414665 2023-01-22 08:50:05.203767: step: 76/77, loss: 1.5529300071648322e-05 2023-01-22 08:50:06.525728: step: 80/77, loss: 1.6067247997852974e-05 2023-01-22 08:50:07.774602: step: 84/77, loss: 8.976113167591393e-05 2023-01-22 08:50:09.071093: step: 88/77, loss: 0.04106712341308594 2023-01-22 08:50:10.355501: step: 92/77, loss: 1.9371507065102378e-08 2023-01-22 08:50:11.691857: step: 96/77, loss: 2.0861591565335402e-07 2023-01-22 08:50:12.946040: step: 100/77, loss: 2.2351732908987287e-08 2023-01-22 08:50:14.241921: step: 104/77, loss: 0.0007628971361555159 2023-01-22 08:50:15.485762: step: 108/77, loss: 2.9802318390892424e-09 2023-01-22 08:50:16.808003: step: 112/77, loss: 2.0861618210687993e-08 2023-01-22 08:50:18.107019: step: 116/77, loss: 0.0005382683593779802 2023-01-22 08:50:19.418066: step: 120/77, loss: 2.1457547916270414e-07 2023-01-22 08:50:20.723590: step: 124/77, loss: 3.502124309306964e-05 2023-01-22 08:50:22.076117: step: 128/77, loss: 3.145249866065569e-05 2023-01-22 08:50:23.315951: step: 132/77, loss: 0.00019145975238643587 2023-01-22 08:50:24.610622: step: 136/77, loss: 0.0002411496825516224 2023-01-22 08:50:25.901420: step: 140/77, loss: 0.0012163245119154453 2023-01-22 08:50:27.156201: step: 144/77, loss: 0.00010317091800970957 2023-01-22 08:50:28.468066: step: 148/77, loss: 4.620007985067787e-06 2023-01-22 08:50:29.756163: step: 152/77, loss: 8.60757427290082e-05 2023-01-22 08:50:31.020817: step: 156/77, loss: 6.705515431804088e-08 2023-01-22 08:50:32.301495: step: 160/77, loss: 0.00013355020200833678 2023-01-22 08:50:33.618890: step: 164/77, loss: 0.005990986712276936 2023-01-22 08:50:34.893352: step: 168/77, loss: 0.00027787365252152085 2023-01-22 08:50:36.124132: step: 172/77, loss: 0.04725373163819313 2023-01-22 08:50:37.400863: step: 176/77, loss: 7.853271381463856e-05 2023-01-22 08:50:38.708780: step: 180/77, loss: 0.00018676927720662206 2023-01-22 08:50:39.930412: step: 184/77, loss: 0.006754858419299126 2023-01-22 08:50:41.242366: step: 188/77, loss: 8.532601350452751e-05 2023-01-22 08:50:42.517720: step: 192/77, loss: 0.0007004007347859442 2023-01-22 08:50:43.806485: step: 196/77, loss: 0.0011839126236736774 2023-01-22 08:50:45.070401: step: 200/77, loss: 0.0004985693376511335 2023-01-22 08:50:46.334920: step: 204/77, loss: 1.244201371264353e-06 2023-01-22 08:50:47.609360: step: 208/77, loss: 0.08877816051244736 2023-01-22 08:50:48.845635: step: 212/77, loss: 1.2874467074652785e-06 2023-01-22 08:50:50.168457: step: 216/77, loss: 6.705516852889559e-08 2023-01-22 08:50:51.437744: step: 220/77, loss: 1.627982419449836e-05 2023-01-22 08:50:52.735358: step: 224/77, loss: 6.169013317958161e-07 2023-01-22 08:50:54.018813: step: 228/77, loss: 5.7242756156483665e-05 2023-01-22 08:50:55.286662: step: 232/77, loss: 0.00034215141204185784 2023-01-22 08:50:56.546358: step: 236/77, loss: 0.0015011318027973175 2023-01-22 08:50:57.870410: step: 240/77, loss: 0.0012336316285654902 2023-01-22 08:50:59.154473: step: 244/77, loss: 0.00022003508638590574 2023-01-22 08:51:00.405855: step: 248/77, loss: 0.01599096693098545 2023-01-22 08:51:01.680428: step: 252/77, loss: 9.920450247591361e-05 2023-01-22 08:51:02.968380: step: 256/77, loss: 0.00031245272839441895 2023-01-22 08:51:04.265470: step: 260/77, loss: 1.2031691767333541e-05 2023-01-22 08:51:05.549489: step: 264/77, loss: 9.238696208058172e-08 2023-01-22 08:51:06.839393: step: 268/77, loss: 6.817108805989847e-05 2023-01-22 08:51:08.148512: step: 272/77, loss: 9.894110917230137e-07 2023-01-22 08:51:09.475172: step: 276/77, loss: 0.0005339889321476221 2023-01-22 08:51:10.717444: step: 280/77, loss: 0.00012003527081105858 2023-01-22 08:51:11.987275: step: 284/77, loss: 1.629951293580234e-05 2023-01-22 08:51:13.300460: step: 288/77, loss: 7.010930858086795e-05 2023-01-22 08:51:14.639858: step: 292/77, loss: 1.5854463981668232e-06 2023-01-22 08:51:15.912409: step: 296/77, loss: 1.568728475831449e-05 2023-01-22 08:51:17.245670: step: 300/77, loss: 0.003171185264363885 2023-01-22 08:51:18.604417: step: 304/77, loss: 4.264489234628854e-06 2023-01-22 08:51:19.870486: step: 308/77, loss: 0.007752659730613232 2023-01-22 08:51:21.182725: step: 312/77, loss: 4.3374842789489776e-05 2023-01-22 08:51:22.481863: step: 316/77, loss: 0.00010408271919004619 2023-01-22 08:51:23.805695: step: 320/77, loss: 0.0005090486956760287 2023-01-22 08:51:25.058679: step: 324/77, loss: 9.227226109942421e-05 2023-01-22 08:51:26.293743: step: 328/77, loss: 0.003174896351993084 2023-01-22 08:51:27.564767: step: 332/77, loss: 0.0017001423984766006 2023-01-22 08:51:28.833021: step: 336/77, loss: 0.00042848309385590255 2023-01-22 08:51:30.150559: step: 340/77, loss: 0.0005391308222897351 2023-01-22 08:51:31.419992: step: 344/77, loss: 0.0006425658357329667 2023-01-22 08:51:32.768944: step: 348/77, loss: 1.4663381080026738e-05 2023-01-22 08:51:34.065453: step: 352/77, loss: 5.437660729512572e-05 2023-01-22 08:51:35.404663: step: 356/77, loss: 0.14871054887771606 2023-01-22 08:51:36.710997: step: 360/77, loss: 2.2351734685344127e-08 2023-01-22 08:51:37.992204: step: 364/77, loss: 0.0003183669177815318 2023-01-22 08:51:39.365182: step: 368/77, loss: 2.8474557893787278e-06 2023-01-22 08:51:40.647338: step: 372/77, loss: 8.405734115513042e-05 2023-01-22 08:51:41.972802: step: 376/77, loss: 3.2782395464892033e-07 2023-01-22 08:51:43.261133: step: 380/77, loss: 3.4272460425199824e-07 2023-01-22 08:51:44.592602: step: 384/77, loss: 4.10609118262073e-06 2023-01-22 08:51:45.900529: step: 388/77, loss: 0.018402086570858955 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Chinese: {'template': {'p': 0.9518072289156626, 'r': 0.6422764227642277, 'f1': 0.7669902912621358}, 'slot': {'p': 0.5, 'r': 0.022727272727272728, 'f1': 0.04347826086956522}, 'combined': 0.03334740396791895, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.48717948717948717, 'r': 0.035916824196597356, 'f1': 0.06690140845070423}, 'combined': 0.04929577464788733, 'epoch': 27} Test Korean: {'template': {'p': 0.9634146341463414, 'r': 0.6422764227642277, 'f1': 0.7707317073170733}, 'slot': {'p': 0.5102040816326531, 'r': 0.022727272727272728, 'f1': 0.043516100957354226}, 'combined': 0.03353923878664375, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Russian: {'template': {'p': 0.9634146341463414, 'r': 0.6422764227642277, 'f1': 0.7707317073170733}, 'slot': {'p': 0.5102040816326531, 'r': 0.022727272727272728, 'f1': 0.043516100957354226}, 'combined': 0.03353923878664375, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 28 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:53:26.457994: step: 4/77, loss: 1.582411414347007e-06 2023-01-22 08:53:27.747116: step: 8/77, loss: 0.00010717161057982594 2023-01-22 08:53:29.018612: step: 12/77, loss: 1.4007048321218463e-07 2023-01-22 08:53:30.336842: step: 16/77, loss: 8.115675154840574e-05 2023-01-22 08:53:31.637443: step: 20/77, loss: 0.03191979229450226 2023-01-22 08:53:32.964779: step: 24/77, loss: 1.1622893225649022e-07 2023-01-22 08:53:34.337035: step: 28/77, loss: 3.9084603486116976e-05 2023-01-22 08:53:35.609557: step: 32/77, loss: 0.002637272235006094 2023-01-22 08:53:36.886311: step: 36/77, loss: 0.05554957687854767 2023-01-22 08:53:38.190111: step: 40/77, loss: 3.560824188753031e-05 2023-01-22 08:53:39.452662: step: 44/77, loss: 1.0132781369520671e-07 2023-01-22 08:53:40.745190: step: 48/77, loss: 0.000232343387324363 2023-01-22 08:53:41.989383: step: 52/77, loss: 1.0445479574627825e-06 2023-01-22 08:53:43.256753: step: 56/77, loss: 1.3514809324988164e-06 2023-01-22 08:53:44.535079: step: 60/77, loss: 1.9591614545788616e-05 2023-01-22 08:53:45.835840: step: 64/77, loss: 0.04506542533636093 2023-01-22 08:53:47.185540: step: 68/77, loss: 4.386712680570781e-05 2023-01-22 08:53:48.465556: step: 72/77, loss: 7.815002390998416e-06 2023-01-22 08:53:49.794323: step: 76/77, loss: 3.305016434751451e-05 2023-01-22 08:53:51.081381: step: 80/77, loss: 0.0001534569018986076 2023-01-22 08:53:52.415492: step: 84/77, loss: 0.0012685380643233657 2023-01-22 08:53:53.680794: step: 88/77, loss: 7.361089160440315e-07 2023-01-22 08:53:54.996876: step: 92/77, loss: 2.9308637294889195e-06 2023-01-22 08:53:56.313193: step: 96/77, loss: 0.0008253856794908643 2023-01-22 08:53:57.570142: step: 100/77, loss: 0.00027290164143778384 2023-01-22 08:53:58.840351: step: 104/77, loss: 0.0004043147200718522 2023-01-22 08:54:00.143504: step: 108/77, loss: 0.00016128386778291315 2023-01-22 08:54:01.433743: step: 112/77, loss: 0.005868277978152037 2023-01-22 08:54:02.733202: step: 116/77, loss: 1.4603088516196294e-07 2023-01-22 08:54:04.018145: step: 120/77, loss: 0.0019091747235506773 2023-01-22 08:54:05.333408: step: 124/77, loss: 0.011760505847632885 2023-01-22 08:54:06.661415: step: 128/77, loss: 0.00010481792560312897 2023-01-22 08:54:07.969672: step: 132/77, loss: 1.921913462865632e-05 2023-01-22 08:54:09.271435: step: 136/77, loss: 2.227662662335206e-06 2023-01-22 08:54:10.573811: step: 140/77, loss: 5.334582056093495e-07 2023-01-22 08:54:11.929592: step: 144/77, loss: 3.588026402212563e-06 2023-01-22 08:54:13.240868: step: 148/77, loss: 0.00016857915034051985 2023-01-22 08:54:14.538831: step: 152/77, loss: 0.031219307333230972 2023-01-22 08:54:15.868136: step: 156/77, loss: 0.0020155508536845446 2023-01-22 08:54:17.146287: step: 160/77, loss: 1.321755553362891e-05 2023-01-22 08:54:18.399619: step: 164/77, loss: 0.018532564863562584 2023-01-22 08:54:19.712076: step: 168/77, loss: 1.5903380699455738e-05 2023-01-22 08:54:21.033446: step: 172/77, loss: 0.024859033524990082 2023-01-22 08:54:22.328128: step: 176/77, loss: 2.0681964087998495e-06 2023-01-22 08:54:23.575537: step: 180/77, loss: 2.6700056423578644e-06 2023-01-22 08:54:24.870709: step: 184/77, loss: 1.8923403786175186e-06 2023-01-22 08:54:26.152295: step: 188/77, loss: 1.3589453828899423e-06 2023-01-22 08:54:27.386309: step: 192/77, loss: 0.04262559115886688 2023-01-22 08:54:28.720428: step: 196/77, loss: 8.77968250279082e-06 2023-01-22 08:54:29.987568: step: 200/77, loss: 0.0001077373162843287 2023-01-22 08:54:31.308588: step: 204/77, loss: 0.00020571955246850848 2023-01-22 08:54:32.620863: step: 208/77, loss: 0.0005280431942082942 2023-01-22 08:54:33.944431: step: 212/77, loss: 1.048697049554903e-05 2023-01-22 08:54:35.238460: step: 216/77, loss: 4.216946035739966e-05 2023-01-22 08:54:36.483759: step: 220/77, loss: 4.079658083355753e-06 2023-01-22 08:54:37.754166: step: 224/77, loss: 0.0004498214984778315 2023-01-22 08:54:39.054122: step: 228/77, loss: 0.0011945064179599285 2023-01-22 08:54:40.325417: step: 232/77, loss: 8.898941814550199e-06 2023-01-22 08:54:41.603818: step: 236/77, loss: 4.5448163632499927e-07 2023-01-22 08:54:42.873506: step: 240/77, loss: 7.256832645907707e-07 2023-01-22 08:54:44.161338: step: 244/77, loss: 0.009154018014669418 2023-01-22 08:54:45.456414: step: 248/77, loss: 9.619673801353201e-05 2023-01-22 08:54:46.753069: step: 252/77, loss: 0.00031977289472706616 2023-01-22 08:54:48.068062: step: 256/77, loss: 0.050523921847343445 2023-01-22 08:54:49.378862: step: 260/77, loss: 1.8461626041244017e-06 2023-01-22 08:54:50.668886: step: 264/77, loss: 9.738103835843503e-05 2023-01-22 08:54:51.933319: step: 268/77, loss: 1.4901155864777138e-08 2023-01-22 08:54:53.234945: step: 272/77, loss: 1.162021635536803e-05 2023-01-22 08:54:54.510439: step: 276/77, loss: 0.0044382489286363125 2023-01-22 08:54:55.757847: step: 280/77, loss: 2.4317955649166834e-06 2023-01-22 08:54:57.072121: step: 284/77, loss: 1.6182344779736013e-06 2023-01-22 08:54:58.397780: step: 288/77, loss: 3.710630699060857e-05 2023-01-22 08:54:59.729024: step: 292/77, loss: 2.1636074052366894e-06 2023-01-22 08:55:01.039177: step: 296/77, loss: 0.004715841729193926 2023-01-22 08:55:02.322871: step: 300/77, loss: 9.745041325004422e-07 2023-01-22 08:55:03.569063: step: 304/77, loss: 0.00031040236353874207 2023-01-22 08:55:04.842372: step: 308/77, loss: 2.852852048818022e-05 2023-01-22 08:55:06.081976: step: 312/77, loss: 0.0031233031768351793 2023-01-22 08:55:07.339446: step: 316/77, loss: 0.0001433442666893825 2023-01-22 08:55:08.653140: step: 320/77, loss: 0.006575982552021742 2023-01-22 08:55:09.957717: step: 324/77, loss: 7.856477168388665e-06 2023-01-22 08:55:11.242991: step: 328/77, loss: 0.00030488992342725396 2023-01-22 08:55:12.512899: step: 332/77, loss: 2.0770582978002494e-06 2023-01-22 08:55:13.757581: step: 336/77, loss: 1.2665948645462777e-07 2023-01-22 08:55:15.048561: step: 340/77, loss: 3.27336965710856e-06 2023-01-22 08:55:16.294042: step: 344/77, loss: 4.828689634450711e-05 2023-01-22 08:55:17.577532: step: 348/77, loss: 0.037822216749191284 2023-01-22 08:55:18.815654: step: 352/77, loss: 7.748373172944412e-05 2023-01-22 08:55:20.074072: step: 356/77, loss: 1.3292006769916043e-05 2023-01-22 08:55:21.393362: step: 360/77, loss: 0.00017053502961061895 2023-01-22 08:55:22.668052: step: 364/77, loss: 1.1916748917428777e-05 2023-01-22 08:55:23.978220: step: 368/77, loss: 0.019462842494249344 2023-01-22 08:55:25.272977: step: 372/77, loss: 4.768004146171734e-06 2023-01-22 08:55:26.545184: step: 376/77, loss: 1.578027513460256e-05 2023-01-22 08:55:27.825812: step: 380/77, loss: 0.011081244796514511 2023-01-22 08:55:29.124304: step: 384/77, loss: 0.052583497017621994 2023-01-22 08:55:30.414640: step: 388/77, loss: 3.352726878347312e-07 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 28} Test Chinese: {'template': {'p': 0.9743589743589743, 'r': 0.6178861788617886, 'f1': 0.7562189054726368}, 'slot': {'p': 0.5609756097560976, 'r': 0.02090909090909091, 'f1': 0.040315512708150744}, 'combined': 0.030487352893725936, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.48717948717948717, 'r': 0.035916824196597356, 'f1': 0.06690140845070423}, 'combined': 0.04839676356008391, 'epoch': 28} Test Korean: {'template': {'p': 0.9743589743589743, 'r': 0.6178861788617886, 'f1': 0.7562189054726368}, 'slot': {'p': 0.5609756097560976, 'r': 0.02090909090909091, 'f1': 0.040315512708150744}, 'combined': 0.030487352893725936, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.48717948717948717, 'r': 0.035916824196597356, 'f1': 0.06690140845070423}, 'combined': 0.04839676356008391, 'epoch': 28} Test Russian: {'template': {'p': 0.9743589743589743, 'r': 0.6178861788617886, 'f1': 0.7562189054726368}, 'slot': {'p': 0.5609756097560976, 'r': 0.02090909090909091, 'f1': 0.040315512708150744}, 'combined': 0.030487352893725936, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ****************************** Epoch: 29 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:57:08.570007: step: 4/77, loss: 3.144855872960761e-05 2023-01-22 08:57:09.845615: step: 8/77, loss: 5.751747949034325e-07 2023-01-22 08:57:11.123139: step: 12/77, loss: 8.017163054319099e-05 2023-01-22 08:57:12.402249: step: 16/77, loss: 0.022102270275354385 2023-01-22 08:57:13.659973: step: 20/77, loss: 2.7567057259147987e-07 2023-01-22 08:57:14.928175: step: 24/77, loss: 4.811163307749666e-06 2023-01-22 08:57:16.179808: step: 28/77, loss: 8.746854973651352e-07 2023-01-22 08:57:17.492487: step: 32/77, loss: 6.541539505633409e-07 2023-01-22 08:57:18.796564: step: 36/77, loss: 7.522347732447088e-05 2023-01-22 08:57:20.065519: step: 40/77, loss: 0.0008213004330173135 2023-01-22 08:57:21.333708: step: 44/77, loss: 2.7119872925140953e-07 2023-01-22 08:57:22.630808: step: 48/77, loss: 3.8976580981398e-05 2023-01-22 08:57:23.867477: step: 52/77, loss: 0.024472558870911598 2023-01-22 08:57:25.143472: step: 56/77, loss: 0.002236069878563285 2023-01-22 08:57:26.455565: step: 60/77, loss: 0.0008119684061966836 2023-01-22 08:57:27.730387: step: 64/77, loss: 0.00016473043069709092 2023-01-22 08:57:29.036513: step: 68/77, loss: 1.0659737199603114e-05 2023-01-22 08:57:30.308768: step: 72/77, loss: 6.331157783279195e-05 2023-01-22 08:57:31.650761: step: 76/77, loss: 8.398948921239935e-06 2023-01-22 08:57:32.961883: step: 80/77, loss: 1.9222457581236085e-07 2023-01-22 08:57:34.232432: step: 84/77, loss: 0.00012843337026424706 2023-01-22 08:57:35.563091: step: 88/77, loss: 2.4824919819366187e-05 2023-01-22 08:57:36.885953: step: 92/77, loss: 7.043376172077842e-06 2023-01-22 08:57:38.250172: step: 96/77, loss: 0.002021679887548089 2023-01-22 08:57:39.564795: step: 100/77, loss: 4.277397238183767e-06 2023-01-22 08:57:40.838448: step: 104/77, loss: 0.01951354369521141 2023-01-22 08:57:42.137515: step: 108/77, loss: 0.0237126424908638 2023-01-22 08:57:43.396720: step: 112/77, loss: 2.9460461519192904e-05 2023-01-22 08:57:44.707864: step: 116/77, loss: 4.008111318398733e-06 2023-01-22 08:57:46.044425: step: 120/77, loss: 0.00021640595514327288 2023-01-22 08:57:47.374526: step: 124/77, loss: 0.013219022192060947 2023-01-22 08:57:48.685543: step: 128/77, loss: 1.1595335308811627e-05 2023-01-22 08:57:49.979990: step: 132/77, loss: 0.00018398706743028015 2023-01-22 08:57:51.322424: step: 136/77, loss: 4.208624886814505e-05 2023-01-22 08:57:52.681441: step: 140/77, loss: 7.261715381901013e-06 2023-01-22 08:57:54.038523: step: 144/77, loss: 0.02468251623213291 2023-01-22 08:57:55.334149: step: 148/77, loss: 4.6508765080943704e-05 2023-01-22 08:57:56.617106: step: 152/77, loss: 1.3247890819911845e-05 2023-01-22 08:57:57.904955: step: 156/77, loss: 2.968638546008151e-05 2023-01-22 08:57:59.199415: step: 160/77, loss: 0.0024238319601863623 2023-01-22 08:58:00.483720: step: 164/77, loss: 8.205627636925783e-06 2023-01-22 08:58:01.764687: step: 168/77, loss: 0.0014939033426344395 2023-01-22 08:58:03.051005: step: 172/77, loss: 9.685743407317204e-08 2023-01-22 08:58:04.352245: step: 176/77, loss: 1.5675155964345322e-06 2023-01-22 08:58:05.635184: step: 180/77, loss: 2.3959746613400057e-06 2023-01-22 08:58:06.878798: step: 184/77, loss: 0.00026503464323468506 2023-01-22 08:58:08.180933: step: 188/77, loss: 0.00022445156355388463 2023-01-22 08:58:09.516821: step: 192/77, loss: 7.375857649094542e-07 2023-01-22 08:58:10.827129: step: 196/77, loss: 2.0861621763401672e-08 2023-01-22 08:58:12.115694: step: 200/77, loss: 0.0003767440211959183 2023-01-22 08:58:13.338135: step: 204/77, loss: 7.617295341333374e-05 2023-01-22 08:58:14.666906: step: 208/77, loss: 0.00012021034490317106 2023-01-22 08:58:15.977528: step: 212/77, loss: 0.005869630724191666 2023-01-22 08:58:17.254250: step: 216/77, loss: 0.00020114541985094547 2023-01-22 08:58:18.546139: step: 220/77, loss: 0.004399897996336222 2023-01-22 08:58:19.847035: step: 224/77, loss: 0.0016715697711333632 2023-01-22 08:58:21.146112: step: 228/77, loss: 0.00018241455836687237 2023-01-22 08:58:22.410040: step: 232/77, loss: 0.0009146218653768301 2023-01-22 08:58:23.710162: step: 236/77, loss: 0.0018498735735192895 2023-01-22 08:58:24.917572: step: 240/77, loss: 0.00022577299387194216 2023-01-22 08:58:26.239081: step: 244/77, loss: 0.05294421315193176 2023-01-22 08:58:27.580575: step: 248/77, loss: 0.0485006682574749 2023-01-22 08:58:28.848035: step: 252/77, loss: 5.894068635825533e-06 2023-01-22 08:58:30.094808: step: 256/77, loss: 1.2963988638148294e-07 2023-01-22 08:58:31.414051: step: 260/77, loss: 5.3644139796915624e-08 2023-01-22 08:58:32.642233: step: 264/77, loss: 0.002035744721069932 2023-01-22 08:58:33.940568: step: 268/77, loss: 0.017132868990302086 2023-01-22 08:58:35.209421: step: 272/77, loss: 3.955773718189448e-05 2023-01-22 08:58:36.488845: step: 276/77, loss: 3.992898200522177e-06 2023-01-22 08:58:37.761690: step: 280/77, loss: 0.0005716330488212407 2023-01-22 08:58:39.019771: step: 284/77, loss: 4.1489121940685436e-05 2023-01-22 08:58:40.308083: step: 288/77, loss: 5.015826172893867e-05 2023-01-22 08:58:41.643766: step: 292/77, loss: 9.168793621938676e-05 2023-01-22 08:58:42.933809: step: 296/77, loss: 1.1026848767414776e-07 2023-01-22 08:58:44.245177: step: 300/77, loss: 0.002865537302568555 2023-01-22 08:58:45.490568: step: 304/77, loss: 0.0002901027328334749 2023-01-22 08:58:46.752038: step: 308/77, loss: 0.0021394614595919847 2023-01-22 08:58:48.044567: step: 312/77, loss: 4.060235369252041e-06 2023-01-22 08:58:49.380721: step: 316/77, loss: 0.0009134943829849362 2023-01-22 08:58:50.695270: step: 320/77, loss: 1.3618976026918972e-06 2023-01-22 08:58:51.998339: step: 324/77, loss: 0.00023269267694558948 2023-01-22 08:58:53.287694: step: 328/77, loss: 9.249313734471798e-05 2023-01-22 08:58:54.597454: step: 332/77, loss: 7.471351273125038e-05 2023-01-22 08:58:55.892156: step: 336/77, loss: 0.00022001775505486876 2023-01-22 08:58:57.164240: step: 340/77, loss: 0.00011256830475758761 2023-01-22 08:58:58.449742: step: 344/77, loss: 4.7683684556432127e-08 2023-01-22 08:58:59.712790: step: 348/77, loss: 9.553597010381054e-06 2023-01-22 08:59:01.009333: step: 352/77, loss: 0.002149000996723771 2023-01-22 08:59:02.342711: step: 356/77, loss: 0.00040330199408344924 2023-01-22 08:59:03.648237: step: 360/77, loss: 4.79812626963394e-07 2023-01-22 08:59:04.990332: step: 364/77, loss: 1.3858060299298813e-07 2023-01-22 08:59:06.291528: step: 368/77, loss: 0.023484427481889725 2023-01-22 08:59:07.582394: step: 372/77, loss: 0.02371416613459587 2023-01-22 08:59:08.892694: step: 376/77, loss: 9.029912462210632e-07 2023-01-22 08:59:10.189678: step: 380/77, loss: 1.163185606856132e-05 2023-01-22 08:59:11.461104: step: 384/77, loss: 6.451639819715638e-06 2023-01-22 08:59:12.739359: step: 388/77, loss: 5.81145016553819e-08 ================================================== Loss: 0.003 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 29} Test Chinese: {'template': {'p': 0.9605263157894737, 'r': 0.5934959349593496, 'f1': 0.7336683417085428}, 'slot': {'p': 0.575, 'r': 0.02090909090909091, 'f1': 0.04035087719298246}, 'combined': 0.029604161156660497, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 29} Test Korean: {'template': {'p': 0.961038961038961, 'r': 0.6016260162601627, 'f1': 0.74}, 'slot': {'p': 0.5641025641025641, 'r': 0.02, 'f1': 0.038630377524143986}, 'combined': 0.028586479367866548, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 29} Test Russian: {'template': {'p': 0.961038961038961, 'r': 0.6016260162601627, 'f1': 0.74}, 'slot': {'p': 0.575, 'r': 0.02090909090909091, 'f1': 0.04035087719298246}, 'combined': 0.029859649122807017, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Chinese: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.5, 'r': 0.02090909090909091, 'f1': 0.04013961605584642}, 'combined': 0.031113003832761343, 'epoch': 5} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Korean: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test for Russian: {'template': {'p': 0.9418604651162791, 'r': 0.6585365853658537, 'f1': 0.7751196172248804}, 'slot': {'p': 0.48936170212765956, 'r': 0.02090909090909091, 'f1': 0.040104620749782036}, 'combined': 0.031085878284520047, 'epoch': 5} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5}