Command that produces this log: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> trans_rep.weight: torch.Size([1024, 2048]) >>> trans_rep.bias: torch.Size([1024]) >>> hidden_ffns.Corruplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Corruplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Cybercrimeplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Cybercrimeplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Disasterplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Disasterplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Displacementplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Displacementplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Epidemiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Epidemiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Etiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Etiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Protestplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Protestplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Terrorplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Terrorplate.layers.0.bias: torch.Size([768]) >>> template_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> type_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Corruplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Corruplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Disasterplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Disasterplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Displacementplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Displacementplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Epidemiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Epidemiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Etiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Etiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Protestplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Protestplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Terrorplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Terrorplate.layers.1.bias: torch.Size([6]) >>> completion_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Corruplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Corruplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Disasterplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Disasterplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Displacementplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Displacementplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Epidemiplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Epidemiplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Etiplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Etiplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Protestplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Protestplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Terrorplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Terrorplate.layers.1.bias: torch.Size([4]) >>> overtime_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) n_trainable_params: 582185936, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:18:51.319740: step: 4/77, loss: 1.0444989204406738 2023-01-22 07:18:52.557467: step: 8/77, loss: 1.0512828826904297 2023-01-22 07:18:53.798909: step: 12/77, loss: 1.056790828704834 2023-01-22 07:18:55.121516: step: 16/77, loss: 1.0526840686798096 2023-01-22 07:18:56.392561: step: 20/77, loss: 1.0438315868377686 2023-01-22 07:18:57.649406: step: 24/77, loss: 1.0445082187652588 2023-01-22 07:18:58.914048: step: 28/77, loss: 1.0523055791854858 2023-01-22 07:19:00.218053: step: 32/77, loss: 1.043738603591919 2023-01-22 07:19:01.474970: step: 36/77, loss: 1.0327272415161133 2023-01-22 07:19:02.744347: step: 40/77, loss: 1.0247113704681396 2023-01-22 07:19:04.017422: step: 44/77, loss: 1.020169973373413 2023-01-22 07:19:05.308221: step: 48/77, loss: 1.012270212173462 2023-01-22 07:19:06.625858: step: 52/77, loss: 0.9989683628082275 2023-01-22 07:19:07.894616: step: 56/77, loss: 1.0067802667617798 2023-01-22 07:19:09.103493: step: 60/77, loss: 0.9830159544944763 2023-01-22 07:19:10.419726: step: 64/77, loss: 0.9617307782173157 2023-01-22 07:19:11.734863: step: 68/77, loss: 0.9592471718788147 2023-01-22 07:19:13.052966: step: 72/77, loss: 0.9552438855171204 2023-01-22 07:19:14.365131: step: 76/77, loss: 0.9411525726318359 2023-01-22 07:19:15.698456: step: 80/77, loss: 0.9311196804046631 2023-01-22 07:19:16.976273: step: 84/77, loss: 0.9200646877288818 2023-01-22 07:19:18.301651: step: 88/77, loss: 0.9144275188446045 2023-01-22 07:19:19.590390: step: 92/77, loss: 0.901231050491333 2023-01-22 07:19:20.873885: step: 96/77, loss: 0.8835911750793457 2023-01-22 07:19:22.162162: step: 100/77, loss: 0.8329275846481323 2023-01-22 07:19:23.448024: step: 104/77, loss: 0.8399503231048584 2023-01-22 07:19:24.667379: step: 108/77, loss: 0.8364051580429077 2023-01-22 07:19:25.922058: step: 112/77, loss: 0.8006849884986877 2023-01-22 07:19:27.210838: step: 116/77, loss: 0.7944232821464539 2023-01-22 07:19:28.512472: step: 120/77, loss: 0.7675794363021851 2023-01-22 07:19:29.788187: step: 124/77, loss: 0.7662374973297119 2023-01-22 07:19:31.132304: step: 128/77, loss: 0.7447980046272278 2023-01-22 07:19:32.446699: step: 132/77, loss: 0.6651491522789001 2023-01-22 07:19:33.785008: step: 136/77, loss: 0.6845536231994629 2023-01-22 07:19:35.114305: step: 140/77, loss: 0.6975764036178589 2023-01-22 07:19:36.425732: step: 144/77, loss: 0.632821798324585 2023-01-22 07:19:37.764099: step: 148/77, loss: 0.6046133041381836 2023-01-22 07:19:39.103152: step: 152/77, loss: 0.6029253005981445 2023-01-22 07:19:40.368599: step: 156/77, loss: 0.5973246097564697 2023-01-22 07:19:41.715723: step: 160/77, loss: 0.6111674308776855 2023-01-22 07:19:43.031010: step: 164/77, loss: 0.5706891417503357 2023-01-22 07:19:44.342959: step: 168/77, loss: 0.4798954129219055 2023-01-22 07:19:45.691788: step: 172/77, loss: 0.42601197957992554 2023-01-22 07:19:46.998898: step: 176/77, loss: 0.45769280195236206 2023-01-22 07:19:48.328620: step: 180/77, loss: 0.38394296169281006 2023-01-22 07:19:49.607456: step: 184/77, loss: 0.4356851577758789 2023-01-22 07:19:50.924722: step: 188/77, loss: 0.44299718737602234 2023-01-22 07:19:52.181807: step: 192/77, loss: 0.33653539419174194 2023-01-22 07:19:53.478644: step: 196/77, loss: 0.3349962830543518 2023-01-22 07:19:54.836234: step: 200/77, loss: 0.3130283057689667 2023-01-22 07:19:56.157843: step: 204/77, loss: 0.2645954191684723 2023-01-22 07:19:57.491218: step: 208/77, loss: 0.2651616930961609 2023-01-22 07:19:58.766662: step: 212/77, loss: 0.4297363758087158 2023-01-22 07:20:00.074840: step: 216/77, loss: 0.3081668019294739 2023-01-22 07:20:01.377547: step: 220/77, loss: 0.24094446003437042 2023-01-22 07:20:02.698424: step: 224/77, loss: 0.22433724999427795 2023-01-22 07:20:04.005514: step: 228/77, loss: 0.27115598320961 2023-01-22 07:20:05.373605: step: 232/77, loss: 0.1454138308763504 2023-01-22 07:20:06.592662: step: 236/77, loss: 0.19214697182178497 2023-01-22 07:20:07.908473: step: 240/77, loss: 0.30754104256629944 2023-01-22 07:20:09.195300: step: 244/77, loss: 0.16875210404396057 2023-01-22 07:20:10.502143: step: 248/77, loss: 0.18391621112823486 2023-01-22 07:20:11.802361: step: 252/77, loss: 0.18308106064796448 2023-01-22 07:20:13.113371: step: 256/77, loss: 0.287090539932251 2023-01-22 07:20:14.426386: step: 260/77, loss: 0.1613195836544037 2023-01-22 07:20:15.713227: step: 264/77, loss: 0.23789790272712708 2023-01-22 07:20:17.009599: step: 268/77, loss: 0.15599499642848969 2023-01-22 07:20:18.294077: step: 272/77, loss: 0.05770568549633026 2023-01-22 07:20:19.639621: step: 276/77, loss: 0.11479754745960236 2023-01-22 07:20:20.951337: step: 280/77, loss: 0.0677362009882927 2023-01-22 07:20:22.258398: step: 284/77, loss: 0.1197415217757225 2023-01-22 07:20:23.556529: step: 288/77, loss: 0.06359511613845825 2023-01-22 07:20:24.883550: step: 292/77, loss: 0.084564208984375 2023-01-22 07:20:26.192773: step: 296/77, loss: 0.390523225069046 2023-01-22 07:20:27.481586: step: 300/77, loss: 0.0934390053153038 2023-01-22 07:20:28.816552: step: 304/77, loss: 0.18686211109161377 2023-01-22 07:20:30.152494: step: 308/77, loss: 0.052013661712408066 2023-01-22 07:20:31.456851: step: 312/77, loss: 0.08927802741527557 2023-01-22 07:20:32.728283: step: 316/77, loss: 0.05229855701327324 2023-01-22 07:20:34.046163: step: 320/77, loss: 0.07221616804599762 2023-01-22 07:20:35.333346: step: 324/77, loss: 0.14579658210277557 2023-01-22 07:20:36.610395: step: 328/77, loss: 0.06311936676502228 2023-01-22 07:20:37.910689: step: 332/77, loss: 0.04793437197804451 2023-01-22 07:20:39.232174: step: 336/77, loss: 0.038324177265167236 2023-01-22 07:20:40.533865: step: 340/77, loss: 0.06017142906785011 2023-01-22 07:20:41.829547: step: 344/77, loss: 0.0927521139383316 2023-01-22 07:20:43.168918: step: 348/77, loss: 0.05052327364683151 2023-01-22 07:20:44.498382: step: 352/77, loss: 0.10013939440250397 2023-01-22 07:20:45.777189: step: 356/77, loss: 0.16297364234924316 2023-01-22 07:20:47.049304: step: 360/77, loss: 0.053793832659721375 2023-01-22 07:20:48.333901: step: 364/77, loss: 0.11682991683483124 2023-01-22 07:20:49.614797: step: 368/77, loss: 0.2339695543050766 2023-01-22 07:20:50.884315: step: 372/77, loss: 0.13662782311439514 2023-01-22 07:20:52.171824: step: 376/77, loss: 0.029554195702075958 2023-01-22 07:20:53.483760: step: 380/77, loss: 0.08953571319580078 2023-01-22 07:20:54.816991: step: 384/77, loss: 0.14427034556865692 2023-01-22 07:20:56.088258: step: 388/77, loss: 0.09134456515312195 ================================================== Loss: 0.481 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:23:06.509517: step: 4/77, loss: 0.12441090494394302 2023-01-22 07:23:07.775591: step: 8/77, loss: 0.09033513069152832 2023-01-22 07:23:09.022383: step: 12/77, loss: 0.07786523550748825 2023-01-22 07:23:10.305066: step: 16/77, loss: 0.13140356540679932 2023-01-22 07:23:11.526828: step: 20/77, loss: 0.07487793266773224 2023-01-22 07:23:12.829738: step: 24/77, loss: 0.11832275986671448 2023-01-22 07:23:14.172170: step: 28/77, loss: 0.09228794276714325 2023-01-22 07:23:15.512202: step: 32/77, loss: 0.0581531897187233 2023-01-22 07:23:16.762320: step: 36/77, loss: 0.08141757547855377 2023-01-22 07:23:18.068323: step: 40/77, loss: 0.06293760240077972 2023-01-22 07:23:19.398911: step: 44/77, loss: 0.027853157371282578 2023-01-22 07:23:20.675479: step: 48/77, loss: 0.1434049904346466 2023-01-22 07:23:21.976622: step: 52/77, loss: 0.11293593049049377 2023-01-22 07:23:23.241012: step: 56/77, loss: 0.04692786931991577 2023-01-22 07:23:24.531623: step: 60/77, loss: 0.053145136684179306 2023-01-22 07:23:25.801544: step: 64/77, loss: 0.05567377060651779 2023-01-22 07:23:27.084729: step: 68/77, loss: 0.03823421150445938 2023-01-22 07:23:28.381106: step: 72/77, loss: 0.1519671231508255 2023-01-22 07:23:29.636965: step: 76/77, loss: 0.13052865862846375 2023-01-22 07:23:30.948964: step: 80/77, loss: 0.15403302013874054 2023-01-22 07:23:32.250172: step: 84/77, loss: 0.15508079528808594 2023-01-22 07:23:33.577586: step: 88/77, loss: 0.06106165423989296 2023-01-22 07:23:34.876323: step: 92/77, loss: 0.09424936771392822 2023-01-22 07:23:36.180158: step: 96/77, loss: 0.18639744818210602 2023-01-22 07:23:37.486486: step: 100/77, loss: 0.15375545620918274 2023-01-22 07:23:38.807464: step: 104/77, loss: 0.08081331104040146 2023-01-22 07:23:40.111634: step: 108/77, loss: 0.12643709778785706 2023-01-22 07:23:41.391584: step: 112/77, loss: 0.06533145904541016 2023-01-22 07:23:42.709884: step: 116/77, loss: 0.0676630437374115 2023-01-22 07:23:44.053950: step: 120/77, loss: 0.15203221142292023 2023-01-22 07:23:45.327225: step: 124/77, loss: 0.12343025207519531 2023-01-22 07:23:46.679330: step: 128/77, loss: 0.06405288726091385 2023-01-22 07:23:47.965055: step: 132/77, loss: 0.026405390352010727 2023-01-22 07:23:49.267961: step: 136/77, loss: 0.12348470091819763 2023-01-22 07:23:50.559825: step: 140/77, loss: 0.05019168555736542 2023-01-22 07:23:51.883336: step: 144/77, loss: 0.06446860730648041 2023-01-22 07:23:53.138024: step: 148/77, loss: 0.06395231187343597 2023-01-22 07:23:54.434094: step: 152/77, loss: 0.09394072741270065 2023-01-22 07:23:55.769206: step: 156/77, loss: 0.08700048923492432 2023-01-22 07:23:57.072223: step: 160/77, loss: 0.16286557912826538 2023-01-22 07:23:58.364458: step: 164/77, loss: 0.16823582351207733 2023-01-22 07:23:59.685158: step: 168/77, loss: 0.14089858531951904 2023-01-22 07:24:00.974279: step: 172/77, loss: 0.1266268938779831 2023-01-22 07:24:02.274923: step: 176/77, loss: 0.09754317998886108 2023-01-22 07:24:03.572454: step: 180/77, loss: 0.04306968301534653 2023-01-22 07:24:04.864408: step: 184/77, loss: 0.17184460163116455 2023-01-22 07:24:06.188764: step: 188/77, loss: 0.17892251908779144 2023-01-22 07:24:07.456295: step: 192/77, loss: 0.11327420175075531 2023-01-22 07:24:08.752508: step: 196/77, loss: 0.08435102552175522 2023-01-22 07:24:10.040028: step: 200/77, loss: 0.11456001549959183 2023-01-22 07:24:11.331775: step: 204/77, loss: 0.09126681089401245 2023-01-22 07:24:12.587354: step: 208/77, loss: 0.12354177236557007 2023-01-22 07:24:13.854991: step: 212/77, loss: 0.08257673680782318 2023-01-22 07:24:15.129858: step: 216/77, loss: 0.1086714044213295 2023-01-22 07:24:16.417396: step: 220/77, loss: 0.07348623871803284 2023-01-22 07:24:17.688455: step: 224/77, loss: 0.16840782761573792 2023-01-22 07:24:19.020110: step: 228/77, loss: 0.06676463782787323 2023-01-22 07:24:20.264514: step: 232/77, loss: 0.07722212374210358 2023-01-22 07:24:21.520258: step: 236/77, loss: 0.12126356363296509 2023-01-22 07:24:22.779374: step: 240/77, loss: 0.1517094075679779 2023-01-22 07:24:24.092023: step: 244/77, loss: 0.08583660423755646 2023-01-22 07:24:25.404588: step: 248/77, loss: 0.06803622841835022 2023-01-22 07:24:26.698537: step: 252/77, loss: 0.1995316445827484 2023-01-22 07:24:27.958879: step: 256/77, loss: 0.08445043861865997 2023-01-22 07:24:29.281133: step: 260/77, loss: 0.09522661566734314 2023-01-22 07:24:30.556804: step: 264/77, loss: 0.11049655079841614 2023-01-22 07:24:31.813480: step: 268/77, loss: 0.11643015593290329 2023-01-22 07:24:33.118708: step: 272/77, loss: 0.07346449792385101 2023-01-22 07:24:34.435414: step: 276/77, loss: 0.22751890122890472 2023-01-22 07:24:35.705929: step: 280/77, loss: 0.10608585178852081 2023-01-22 07:24:37.021391: step: 284/77, loss: 0.1218462884426117 2023-01-22 07:24:38.306349: step: 288/77, loss: 0.30423903465270996 2023-01-22 07:24:39.634692: step: 292/77, loss: 0.12431395053863525 2023-01-22 07:24:40.914163: step: 296/77, loss: 0.0865350216627121 2023-01-22 07:24:42.174511: step: 300/77, loss: 0.28666821122169495 2023-01-22 07:24:43.458836: step: 304/77, loss: 0.1154976487159729 2023-01-22 07:24:44.747213: step: 308/77, loss: 0.05872436612844467 2023-01-22 07:24:46.002570: step: 312/77, loss: 0.0811275988817215 2023-01-22 07:24:47.286475: step: 316/77, loss: 0.07434535026550293 2023-01-22 07:24:48.584736: step: 320/77, loss: 0.0865451991558075 2023-01-22 07:24:49.880253: step: 324/77, loss: 0.08560467511415482 2023-01-22 07:24:51.211962: step: 328/77, loss: 0.26304253935813904 2023-01-22 07:24:52.494456: step: 332/77, loss: 0.11373617500066757 2023-01-22 07:24:53.765181: step: 336/77, loss: 0.15586160123348236 2023-01-22 07:24:55.075416: step: 340/77, loss: 0.16020292043685913 2023-01-22 07:24:56.364309: step: 344/77, loss: 0.14308831095695496 2023-01-22 07:24:57.647924: step: 348/77, loss: 0.2223663330078125 2023-01-22 07:24:58.948055: step: 352/77, loss: 0.19795863330364227 2023-01-22 07:25:00.227049: step: 356/77, loss: 0.17458972334861755 2023-01-22 07:25:01.519390: step: 360/77, loss: 0.16533160209655762 2023-01-22 07:25:02.830099: step: 364/77, loss: 0.09042729437351227 2023-01-22 07:25:04.151207: step: 368/77, loss: 0.14386004209518433 2023-01-22 07:25:05.448144: step: 372/77, loss: 0.10231050848960876 2023-01-22 07:25:06.753531: step: 376/77, loss: 0.12333185970783234 2023-01-22 07:25:08.023021: step: 380/77, loss: 0.031893059611320496 2023-01-22 07:25:09.325388: step: 384/77, loss: 0.18950426578521729 2023-01-22 07:25:10.624319: step: 388/77, loss: 0.10373860597610474 ================================================== Loss: 0.115 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 2 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:26:53.780073: step: 4/77, loss: 0.1142747551202774 2023-01-22 07:26:55.092774: step: 8/77, loss: 0.05736310034990311 2023-01-22 07:26:56.400998: step: 12/77, loss: 0.07951190322637558 2023-01-22 07:26:57.682973: step: 16/77, loss: 0.26192837953567505 2023-01-22 07:26:58.984325: step: 20/77, loss: 0.059663981199264526 2023-01-22 07:27:00.267205: step: 24/77, loss: 0.13435494899749756 2023-01-22 07:27:01.555591: step: 28/77, loss: 0.10370631515979767 2023-01-22 07:27:02.836437: step: 32/77, loss: 0.08149582147598267 2023-01-22 07:27:04.181126: step: 36/77, loss: 0.05023789405822754 2023-01-22 07:27:05.504695: step: 40/77, loss: 0.1312832534313202 2023-01-22 07:27:06.833897: step: 44/77, loss: 0.03221912682056427 2023-01-22 07:27:08.137620: step: 48/77, loss: 0.11100511252880096 2023-01-22 07:27:09.429851: step: 52/77, loss: 0.11007451266050339 2023-01-22 07:27:10.695813: step: 56/77, loss: 0.032753244042396545 2023-01-22 07:27:11.937901: step: 60/77, loss: 0.12694334983825684 2023-01-22 07:27:13.229991: step: 64/77, loss: 0.07343435287475586 2023-01-22 07:27:14.525963: step: 68/77, loss: 0.06336362659931183 2023-01-22 07:27:15.822315: step: 72/77, loss: 0.041768234223127365 2023-01-22 07:27:17.081544: step: 76/77, loss: 0.056367091834545135 2023-01-22 07:27:18.395723: step: 80/77, loss: 0.08505905419588089 2023-01-22 07:27:19.683699: step: 84/77, loss: 0.07694367319345474 2023-01-22 07:27:21.000744: step: 88/77, loss: 0.08983577787876129 2023-01-22 07:27:22.276248: step: 92/77, loss: 0.053283900022506714 2023-01-22 07:27:23.584601: step: 96/77, loss: 0.10469195246696472 2023-01-22 07:27:24.851810: step: 100/77, loss: 0.06275144964456558 2023-01-22 07:27:26.129804: step: 104/77, loss: 0.06701089441776276 2023-01-22 07:27:27.406392: step: 108/77, loss: 0.04644807428121567 2023-01-22 07:27:28.722147: step: 112/77, loss: 0.0461655892431736 2023-01-22 07:27:30.008995: step: 116/77, loss: 0.05462607741355896 2023-01-22 07:27:31.315302: step: 120/77, loss: 0.04734814912080765 2023-01-22 07:27:32.590237: step: 124/77, loss: 0.08012720942497253 2023-01-22 07:27:33.905183: step: 128/77, loss: 0.05201762914657593 2023-01-22 07:27:35.212336: step: 132/77, loss: 0.10686588287353516 2023-01-22 07:27:36.542656: step: 136/77, loss: 0.12117096781730652 2023-01-22 07:27:37.818775: step: 140/77, loss: 0.03733343631029129 2023-01-22 07:27:39.095057: step: 144/77, loss: 0.06947799026966095 2023-01-22 07:27:40.355616: step: 148/77, loss: 0.15459129214286804 2023-01-22 07:27:41.627675: step: 152/77, loss: 0.10610118508338928 2023-01-22 07:27:42.913976: step: 156/77, loss: 0.04381667077541351 2023-01-22 07:27:44.218925: step: 160/77, loss: 0.06606587767601013 2023-01-22 07:27:45.520383: step: 164/77, loss: 0.03466331586241722 2023-01-22 07:27:46.769588: step: 168/77, loss: 0.05533977597951889 2023-01-22 07:27:48.094913: step: 172/77, loss: 0.06665311753749847 2023-01-22 07:27:49.395152: step: 176/77, loss: 0.0230097696185112 2023-01-22 07:27:50.639639: step: 180/77, loss: 0.02612961269915104 2023-01-22 07:27:51.924304: step: 184/77, loss: 0.013719271868467331 2023-01-22 07:27:53.251025: step: 188/77, loss: 0.09981541335582733 2023-01-22 07:27:54.552958: step: 192/77, loss: 0.02909611165523529 2023-01-22 07:27:55.851989: step: 196/77, loss: 0.025418300181627274 2023-01-22 07:27:57.177172: step: 200/77, loss: 0.04251161217689514 2023-01-22 07:27:58.503509: step: 204/77, loss: 0.01297570951282978 2023-01-22 07:27:59.817247: step: 208/77, loss: 0.2978173494338989 2023-01-22 07:28:01.084186: step: 212/77, loss: 0.014417173340916634 2023-01-22 07:28:02.385542: step: 216/77, loss: 0.052583567798137665 2023-01-22 07:28:03.672844: step: 220/77, loss: 0.028254462406039238 2023-01-22 07:28:04.939941: step: 224/77, loss: 0.10017166286706924 2023-01-22 07:28:06.287076: step: 228/77, loss: 0.04614071175456047 2023-01-22 07:28:07.563763: step: 232/77, loss: 0.03228935971856117 2023-01-22 07:28:08.837716: step: 236/77, loss: 0.03745909035205841 2023-01-22 07:28:10.144340: step: 240/77, loss: 0.08055315166711807 2023-01-22 07:28:11.447793: step: 244/77, loss: 0.011832380667328835 2023-01-22 07:28:12.765636: step: 248/77, loss: 0.11246861517429352 2023-01-22 07:28:14.063049: step: 252/77, loss: 0.006759069859981537 2023-01-22 07:28:15.406203: step: 256/77, loss: 0.07749151438474655 2023-01-22 07:28:16.784209: step: 260/77, loss: 0.09028497338294983 2023-01-22 07:28:18.057567: step: 264/77, loss: 0.0039024720899760723 2023-01-22 07:28:19.338408: step: 268/77, loss: 0.08940325677394867 2023-01-22 07:28:20.617331: step: 272/77, loss: 0.0029816448222845793 2023-01-22 07:28:21.909893: step: 276/77, loss: 0.24304865300655365 2023-01-22 07:28:23.197617: step: 280/77, loss: 0.021988455206155777 2023-01-22 07:28:24.501533: step: 284/77, loss: 0.03558812662959099 2023-01-22 07:28:25.723412: step: 288/77, loss: 0.008235489949584007 2023-01-22 07:28:26.994197: step: 292/77, loss: 0.03035302273929119 2023-01-22 07:28:28.306792: step: 296/77, loss: 0.025868304073810577 2023-01-22 07:28:29.624085: step: 300/77, loss: 0.06794444471597672 2023-01-22 07:28:30.931339: step: 304/77, loss: 0.09457609057426453 2023-01-22 07:28:32.262140: step: 308/77, loss: 0.17274029552936554 2023-01-22 07:28:33.583760: step: 312/77, loss: 0.02986432984471321 2023-01-22 07:28:34.930960: step: 316/77, loss: 0.006221502088010311 2023-01-22 07:28:36.232237: step: 320/77, loss: 0.04203524440526962 2023-01-22 07:28:37.519886: step: 324/77, loss: 0.0495450533926487 2023-01-22 07:28:38.873292: step: 328/77, loss: 0.11815431714057922 2023-01-22 07:28:40.150925: step: 332/77, loss: 0.07679884135723114 2023-01-22 07:28:41.434181: step: 336/77, loss: 0.02332843467593193 2023-01-22 07:28:42.744712: step: 340/77, loss: 0.035125378519296646 2023-01-22 07:28:44.029995: step: 344/77, loss: 0.18532869219779968 2023-01-22 07:28:45.360002: step: 348/77, loss: 0.017119672149419785 2023-01-22 07:28:46.659892: step: 352/77, loss: 0.04413990676403046 2023-01-22 07:28:47.933879: step: 356/77, loss: 0.024440914392471313 2023-01-22 07:28:49.247618: step: 360/77, loss: 0.02238747477531433 2023-01-22 07:28:50.576217: step: 364/77, loss: 0.12076471745967865 2023-01-22 07:28:51.852459: step: 368/77, loss: 0.05480184406042099 2023-01-22 07:28:53.151030: step: 372/77, loss: 0.03360671550035477 2023-01-22 07:28:54.462314: step: 376/77, loss: 0.04755230247974396 2023-01-22 07:28:55.744254: step: 380/77, loss: 0.08242542296648026 2023-01-22 07:28:57.040671: step: 384/77, loss: 0.03073291853070259 2023-01-22 07:28:58.332731: step: 388/77, loss: 0.023550812155008316 ================================================== Loss: 0.068 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:31:06.331116: step: 4/77, loss: 0.10117167234420776 2023-01-22 07:31:07.672846: step: 8/77, loss: 0.035268642008304596 2023-01-22 07:31:08.967517: step: 12/77, loss: 0.020470503717660904 2023-01-22 07:31:10.210031: step: 16/77, loss: 0.0317903496325016 2023-01-22 07:31:11.533064: step: 20/77, loss: 0.03285245597362518 2023-01-22 07:31:12.808574: step: 24/77, loss: 0.013518502935767174 2023-01-22 07:31:14.096332: step: 28/77, loss: 0.04890953376889229 2023-01-22 07:31:15.357352: step: 32/77, loss: 0.007277567870914936 2023-01-22 07:31:16.623084: step: 36/77, loss: 0.035050973296165466 2023-01-22 07:31:17.900683: step: 40/77, loss: 0.005168437957763672 2023-01-22 07:31:19.143656: step: 44/77, loss: 0.09427545219659805 2023-01-22 07:31:20.470776: step: 48/77, loss: 0.007487798575311899 2023-01-22 07:31:21.756726: step: 52/77, loss: 0.13489431142807007 2023-01-22 07:31:23.069298: step: 56/77, loss: 0.018846577033400536 2023-01-22 07:31:24.332401: step: 60/77, loss: 0.06260690093040466 2023-01-22 07:31:25.630029: step: 64/77, loss: 0.03510448336601257 2023-01-22 07:31:26.915517: step: 68/77, loss: 0.09224671125411987 2023-01-22 07:31:28.182594: step: 72/77, loss: 0.006170531734824181 2023-01-22 07:31:29.517572: step: 76/77, loss: 0.04142601788043976 2023-01-22 07:31:30.851154: step: 80/77, loss: 0.00428326241672039 2023-01-22 07:31:32.171207: step: 84/77, loss: 0.04080601781606674 2023-01-22 07:31:33.445784: step: 88/77, loss: 0.014709645882248878 2023-01-22 07:31:34.766800: step: 92/77, loss: 0.02892473340034485 2023-01-22 07:31:36.077970: step: 96/77, loss: 0.027689656242728233 2023-01-22 07:31:37.371937: step: 100/77, loss: 0.029068466275930405 2023-01-22 07:31:38.711466: step: 104/77, loss: 0.08021325618028641 2023-01-22 07:31:40.033854: step: 108/77, loss: 0.0033673420548439026 2023-01-22 07:31:41.293761: step: 112/77, loss: 0.06603340059518814 2023-01-22 07:31:42.612409: step: 116/77, loss: 0.03336016461253166 2023-01-22 07:31:43.926183: step: 120/77, loss: 0.044526346027851105 2023-01-22 07:31:45.190686: step: 124/77, loss: 0.007119000889360905 2023-01-22 07:31:46.484355: step: 128/77, loss: 0.0037483470514416695 2023-01-22 07:31:47.747952: step: 132/77, loss: 0.01118344534188509 2023-01-22 07:31:48.998541: step: 136/77, loss: 0.004519638139754534 2023-01-22 07:31:50.326369: step: 140/77, loss: 0.011617088690400124 2023-01-22 07:31:51.616059: step: 144/77, loss: 0.022709239274263382 2023-01-22 07:31:52.920561: step: 148/77, loss: 0.009147140197455883 2023-01-22 07:31:54.204002: step: 152/77, loss: 0.07406341284513474 2023-01-22 07:31:55.469266: step: 156/77, loss: 0.017505250871181488 2023-01-22 07:31:56.804691: step: 160/77, loss: 0.045731909573078156 2023-01-22 07:31:58.083912: step: 164/77, loss: 0.026252295821905136 2023-01-22 07:31:59.421947: step: 168/77, loss: 0.10950756072998047 2023-01-22 07:32:00.728829: step: 172/77, loss: 0.037779469043016434 2023-01-22 07:32:02.045972: step: 176/77, loss: 0.12713013589382172 2023-01-22 07:32:03.370977: step: 180/77, loss: 0.07886983454227448 2023-01-22 07:32:04.686165: step: 184/77, loss: 0.04936029389500618 2023-01-22 07:32:05.952146: step: 188/77, loss: 0.033952511847019196 2023-01-22 07:32:07.249311: step: 192/77, loss: 0.09324926137924194 2023-01-22 07:32:08.567263: step: 196/77, loss: 0.07248904556035995 2023-01-22 07:32:09.877961: step: 200/77, loss: 0.3481594920158386 2023-01-22 07:32:11.152643: step: 204/77, loss: 0.0296194888651371 2023-01-22 07:32:12.437985: step: 208/77, loss: 0.009488312527537346 2023-01-22 07:32:13.753410: step: 212/77, loss: 0.01296425424516201 2023-01-22 07:32:15.123521: step: 216/77, loss: 0.010238438844680786 2023-01-22 07:32:16.387451: step: 220/77, loss: 0.09828265756368637 2023-01-22 07:32:17.697977: step: 224/77, loss: 0.047479234635829926 2023-01-22 07:32:18.982721: step: 228/77, loss: 0.015161667950451374 2023-01-22 07:32:20.288294: step: 232/77, loss: 0.07021617889404297 2023-01-22 07:32:21.632190: step: 236/77, loss: 0.04781772941350937 2023-01-22 07:32:22.941107: step: 240/77, loss: 0.010120526887476444 2023-01-22 07:32:24.237045: step: 244/77, loss: 0.11225083470344543 2023-01-22 07:32:25.574203: step: 248/77, loss: 0.04302593320608139 2023-01-22 07:32:26.927964: step: 252/77, loss: 0.007975922897458076 2023-01-22 07:32:28.216297: step: 256/77, loss: 0.03689086437225342 2023-01-22 07:32:29.501088: step: 260/77, loss: 0.022007912397384644 2023-01-22 07:32:30.807737: step: 264/77, loss: 0.06469503790140152 2023-01-22 07:32:32.084451: step: 268/77, loss: 0.010310238227248192 2023-01-22 07:32:33.363757: step: 272/77, loss: 0.060053229331970215 2023-01-22 07:32:34.668286: step: 276/77, loss: 0.015747323632240295 2023-01-22 07:32:35.945288: step: 280/77, loss: 0.011841855943202972 2023-01-22 07:32:37.190216: step: 284/77, loss: 0.02313528209924698 2023-01-22 07:32:38.473506: step: 288/77, loss: 0.03175218030810356 2023-01-22 07:32:39.736135: step: 292/77, loss: 0.05135905742645264 2023-01-22 07:32:41.050358: step: 296/77, loss: 0.015323103405535221 2023-01-22 07:32:42.372513: step: 300/77, loss: 0.023951048031449318 2023-01-22 07:32:43.693479: step: 304/77, loss: 0.04787027835845947 2023-01-22 07:32:45.031934: step: 308/77, loss: 0.006300767883658409 2023-01-22 07:32:46.341373: step: 312/77, loss: 0.06664685904979706 2023-01-22 07:32:47.609593: step: 316/77, loss: 0.02057635597884655 2023-01-22 07:32:48.930354: step: 320/77, loss: 0.033022478222846985 2023-01-22 07:32:50.242807: step: 324/77, loss: 0.010527991689741611 2023-01-22 07:32:51.552265: step: 328/77, loss: 0.032395221292972565 2023-01-22 07:32:52.840819: step: 332/77, loss: 0.05708228796720505 2023-01-22 07:32:54.141558: step: 336/77, loss: 0.05209455266594887 2023-01-22 07:32:55.468601: step: 340/77, loss: 0.17037370800971985 2023-01-22 07:32:56.781075: step: 344/77, loss: 0.022650867700576782 2023-01-22 07:32:58.133126: step: 348/77, loss: 0.0066597433760762215 2023-01-22 07:32:59.440255: step: 352/77, loss: 0.009481480345129967 2023-01-22 07:33:00.743637: step: 356/77, loss: 0.024885304272174835 2023-01-22 07:33:02.053865: step: 360/77, loss: 0.004146650433540344 2023-01-22 07:33:03.406045: step: 364/77, loss: 0.04929664731025696 2023-01-22 07:33:04.725156: step: 368/77, loss: 0.024423057213425636 2023-01-22 07:33:06.040534: step: 372/77, loss: 0.024516455829143524 2023-01-22 07:33:07.360781: step: 376/77, loss: 0.02542997896671295 2023-01-22 07:33:08.653046: step: 380/77, loss: 0.02705446630716324 2023-01-22 07:33:09.853334: step: 384/77, loss: 0.009648672305047512 2023-01-22 07:33:11.176108: step: 388/77, loss: 0.021449856460094452 ================================================== Loss: 0.041 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test Chinese: {'template': {'p': 0.9661016949152542, 'r': 0.44881889763779526, 'f1': 0.6129032258064516}, 'slot': {'p': 0.5925925925925926, 'r': 0.01384083044982699, 'f1': 0.02704987320371936}, 'combined': 0.016578954544215093, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test Korean: {'template': {'p': 0.9672131147540983, 'r': 0.4645669291338583, 'f1': 0.6276595744680851}, 'slot': {'p': 0.6153846153846154, 'r': 0.01384083044982699, 'f1': 0.027072758037225045}, 'combined': 0.0169924757893221, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test Russian: {'template': {'p': 0.9661016949152542, 'r': 0.44881889763779526, 'f1': 0.6129032258064516}, 'slot': {'p': 0.6296296296296297, 'r': 0.014705882352941176, 'f1': 0.028740490278951817}, 'combined': 0.017615139203228533, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 4 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:34:54.815999: step: 4/77, loss: 0.004307045601308346 2023-01-22 07:34:56.081848: step: 8/77, loss: 0.019689451903104782 2023-01-22 07:34:57.394128: step: 12/77, loss: 0.035611364990472794 2023-01-22 07:34:58.664166: step: 16/77, loss: 0.09661786258220673 2023-01-22 07:34:59.991940: step: 20/77, loss: 0.016645941883325577 2023-01-22 07:35:01.283665: step: 24/77, loss: 0.06498975306749344 2023-01-22 07:35:02.568946: step: 28/77, loss: 0.07612696290016174 2023-01-22 07:35:03.886149: step: 32/77, loss: 0.02551441453397274 2023-01-22 07:35:05.175881: step: 36/77, loss: 0.03179791569709778 2023-01-22 07:35:06.535314: step: 40/77, loss: 0.007174788508564234 2023-01-22 07:35:07.826752: step: 44/77, loss: 0.024703051894903183 2023-01-22 07:35:09.116149: step: 48/77, loss: 0.021204259246587753 2023-01-22 07:35:10.398238: step: 52/77, loss: 0.021670710295438766 2023-01-22 07:35:11.723385: step: 56/77, loss: 0.08658331632614136 2023-01-22 07:35:12.989879: step: 60/77, loss: 0.035944074392318726 2023-01-22 07:35:14.337967: step: 64/77, loss: 0.00285487063229084 2023-01-22 07:35:15.662646: step: 68/77, loss: 0.044412776827812195 2023-01-22 07:35:16.984714: step: 72/77, loss: 0.01924610510468483 2023-01-22 07:35:18.287679: step: 76/77, loss: 0.00307654682546854 2023-01-22 07:35:19.584562: step: 80/77, loss: 0.010064997710287571 2023-01-22 07:35:20.924218: step: 84/77, loss: 0.008352034725248814 2023-01-22 07:35:22.249888: step: 88/77, loss: 0.08968818187713623 2023-01-22 07:35:23.577118: step: 92/77, loss: 0.009886187501251698 2023-01-22 07:35:24.822819: step: 96/77, loss: 0.001971887657418847 2023-01-22 07:35:26.107269: step: 100/77, loss: 0.04601195082068443 2023-01-22 07:35:27.419380: step: 104/77, loss: 0.09782662242650986 2023-01-22 07:35:28.722471: step: 108/77, loss: 0.025908637791872025 2023-01-22 07:35:29.998299: step: 112/77, loss: 0.016505222767591476 2023-01-22 07:35:31.252571: step: 116/77, loss: 0.024203073233366013 2023-01-22 07:35:32.492436: step: 120/77, loss: 0.060282886028289795 2023-01-22 07:35:33.755742: step: 124/77, loss: 0.021418243646621704 2023-01-22 07:35:35.055940: step: 128/77, loss: 0.21600335836410522 2023-01-22 07:35:36.364981: step: 132/77, loss: 0.025364048779010773 2023-01-22 07:35:37.624822: step: 136/77, loss: 0.018149809911847115 2023-01-22 07:35:38.928792: step: 140/77, loss: 0.011929758824408054 2023-01-22 07:35:40.227063: step: 144/77, loss: 0.046187981963157654 2023-01-22 07:35:41.502807: step: 148/77, loss: 0.020687207579612732 2023-01-22 07:35:42.786783: step: 152/77, loss: 0.0076691824942827225 2023-01-22 07:35:43.997317: step: 156/77, loss: 0.01612572930753231 2023-01-22 07:35:45.302145: step: 160/77, loss: 0.005361623130738735 2023-01-22 07:35:46.576292: step: 164/77, loss: 0.013921466656029224 2023-01-22 07:35:47.870867: step: 168/77, loss: 0.014753730036318302 2023-01-22 07:35:49.151367: step: 172/77, loss: 0.006856784224510193 2023-01-22 07:35:50.472246: step: 176/77, loss: 0.006791251711547375 2023-01-22 07:35:51.750119: step: 180/77, loss: 0.023701781406998634 2023-01-22 07:35:53.047414: step: 184/77, loss: 0.003863999620079994 2023-01-22 07:35:54.336910: step: 188/77, loss: 0.08335664868354797 2023-01-22 07:35:55.675637: step: 192/77, loss: 0.0314469076693058 2023-01-22 07:35:56.958571: step: 196/77, loss: 0.028623215854167938 2023-01-22 07:35:58.278317: step: 200/77, loss: 0.016699109226465225 2023-01-22 07:35:59.619422: step: 204/77, loss: 0.20456035435199738 2023-01-22 07:36:00.951307: step: 208/77, loss: 0.02883007377386093 2023-01-22 07:36:02.259898: step: 212/77, loss: 0.019007431343197823 2023-01-22 07:36:03.553313: step: 216/77, loss: 0.012269491329789162 2023-01-22 07:36:04.873257: step: 220/77, loss: 0.018181482329964638 2023-01-22 07:36:06.145554: step: 224/77, loss: 0.006761718541383743 2023-01-22 07:36:07.442509: step: 228/77, loss: 0.09358687698841095 2023-01-22 07:36:08.741365: step: 232/77, loss: 0.015363413840532303 2023-01-22 07:36:10.040417: step: 236/77, loss: 0.12969405949115753 2023-01-22 07:36:11.311849: step: 240/77, loss: 0.013874266296625137 2023-01-22 07:36:12.610344: step: 244/77, loss: 0.01584259234368801 2023-01-22 07:36:13.877059: step: 248/77, loss: 0.014385403133928776 2023-01-22 07:36:15.202590: step: 252/77, loss: 0.0032219949644058943 2023-01-22 07:36:16.503951: step: 256/77, loss: 0.006406520493328571 2023-01-22 07:36:17.820026: step: 260/77, loss: 0.022642673924565315 2023-01-22 07:36:19.120171: step: 264/77, loss: 0.021770788356661797 2023-01-22 07:36:20.425098: step: 268/77, loss: 0.13582871854305267 2023-01-22 07:36:21.739036: step: 272/77, loss: 0.016680482774972916 2023-01-22 07:36:23.079566: step: 276/77, loss: 0.02853316254913807 2023-01-22 07:36:24.350298: step: 280/77, loss: 0.01124812476336956 2023-01-22 07:36:25.672662: step: 284/77, loss: 0.038071952760219574 2023-01-22 07:36:26.929680: step: 288/77, loss: 0.016053292900323868 2023-01-22 07:36:28.246769: step: 292/77, loss: 0.02302248775959015 2023-01-22 07:36:29.574990: step: 296/77, loss: 0.02152237854897976 2023-01-22 07:36:30.864087: step: 300/77, loss: 0.004397551063448191 2023-01-22 07:36:32.157899: step: 304/77, loss: 0.05093690752983093 2023-01-22 07:36:33.446018: step: 308/77, loss: 0.02377844601869583 2023-01-22 07:36:34.787442: step: 312/77, loss: 0.06021858751773834 2023-01-22 07:36:36.081388: step: 316/77, loss: 0.26077935099601746 2023-01-22 07:36:37.423102: step: 320/77, loss: 0.012799538671970367 2023-01-22 07:36:38.755334: step: 324/77, loss: 0.013254077173769474 2023-01-22 07:36:40.039154: step: 328/77, loss: 0.0028235381469130516 2023-01-22 07:36:41.297691: step: 332/77, loss: 0.004842108115553856 2023-01-22 07:36:42.614575: step: 336/77, loss: 0.03993977606296539 2023-01-22 07:36:43.976608: step: 340/77, loss: 0.02504277601838112 2023-01-22 07:36:45.244757: step: 344/77, loss: 0.02159244380891323 2023-01-22 07:36:46.529076: step: 348/77, loss: 0.025638697668910027 2023-01-22 07:36:47.824995: step: 352/77, loss: 0.04387909173965454 2023-01-22 07:36:49.135151: step: 356/77, loss: 0.09395100176334381 2023-01-22 07:36:50.488586: step: 360/77, loss: 0.06930564343929291 2023-01-22 07:36:51.744758: step: 364/77, loss: 0.08788342028856277 2023-01-22 07:36:53.062867: step: 368/77, loss: 0.033105265349149704 2023-01-22 07:36:54.418402: step: 372/77, loss: 0.06653378158807755 2023-01-22 07:36:55.757912: step: 376/77, loss: 0.008012992329895496 2023-01-22 07:36:57.037284: step: 380/77, loss: 0.018076276406645775 2023-01-22 07:36:58.334470: step: 384/77, loss: 0.031726736575365067 2023-01-22 07:36:59.638940: step: 388/77, loss: 0.07097798585891724 ================================================== Loss: 0.037 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Chinese: {'template': {'p': 0.9821428571428571, 'r': 0.4330708661417323, 'f1': 0.6010928961748634}, 'slot': {'p': 0.6818181818181818, 'r': 0.012975778546712802, 'f1': 0.025466893039049233}, 'combined': 0.015307968493417572, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Korean: {'template': {'p': 0.9821428571428571, 'r': 0.4330708661417323, 'f1': 0.6010928961748634}, 'slot': {'p': 0.6818181818181818, 'r': 0.012975778546712802, 'f1': 0.025466893039049233}, 'combined': 0.015307968493417572, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Russian: {'template': {'p': 0.9821428571428571, 'r': 0.4330708661417323, 'f1': 0.6010928961748634}, 'slot': {'p': 0.6818181818181818, 'r': 0.012975778546712802, 'f1': 0.025466893039049233}, 'combined': 0.015307968493417572, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 5 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:38:43.030836: step: 4/77, loss: 0.01851249672472477 2023-01-22 07:38:44.341430: step: 8/77, loss: 0.0035652555525302887 2023-01-22 07:38:45.637376: step: 12/77, loss: 0.007702311966568232 2023-01-22 07:38:46.898706: step: 16/77, loss: 0.01385408453643322 2023-01-22 07:38:48.198181: step: 20/77, loss: 0.026695922017097473 2023-01-22 07:38:49.462210: step: 24/77, loss: 0.012424022890627384 2023-01-22 07:38:50.787444: step: 28/77, loss: 0.007287586107850075 2023-01-22 07:38:52.094877: step: 32/77, loss: 0.037353646010160446 2023-01-22 07:38:53.380246: step: 36/77, loss: 0.018462033942341805 2023-01-22 07:38:54.690686: step: 40/77, loss: 0.016039032489061356 2023-01-22 07:38:55.977193: step: 44/77, loss: 0.06680099666118622 2023-01-22 07:38:57.277900: step: 48/77, loss: 0.006608352065086365 2023-01-22 07:38:58.594852: step: 52/77, loss: 0.10271197557449341 2023-01-22 07:38:59.886804: step: 56/77, loss: 0.017130937427282333 2023-01-22 07:39:01.225737: step: 60/77, loss: 0.01108469720929861 2023-01-22 07:39:02.530342: step: 64/77, loss: 0.048407115042209625 2023-01-22 07:39:03.792859: step: 68/77, loss: 0.03218407183885574 2023-01-22 07:39:05.086315: step: 72/77, loss: 0.0029523978009819984 2023-01-22 07:39:06.331168: step: 76/77, loss: 0.03068627417087555 2023-01-22 07:39:07.664811: step: 80/77, loss: 0.002274099737405777 2023-01-22 07:39:08.962826: step: 84/77, loss: 0.01065951120108366 2023-01-22 07:39:10.244493: step: 88/77, loss: 0.0039505185559391975 2023-01-22 07:39:11.506335: step: 92/77, loss: 0.03132602944970131 2023-01-22 07:39:12.815390: step: 96/77, loss: 0.047224149107933044 2023-01-22 07:39:14.154666: step: 100/77, loss: 0.037667229771614075 2023-01-22 07:39:15.443445: step: 104/77, loss: 0.030513830482959747 2023-01-22 07:39:16.772388: step: 108/77, loss: 0.02144569344818592 2023-01-22 07:39:18.039853: step: 112/77, loss: 0.008346985094249249 2023-01-22 07:39:19.366394: step: 116/77, loss: 0.03922979533672333 2023-01-22 07:39:20.714783: step: 120/77, loss: 0.0027566482312977314 2023-01-22 07:39:21.967447: step: 124/77, loss: 0.004905262961983681 2023-01-22 07:39:23.266728: step: 128/77, loss: 0.007458773907274008 2023-01-22 07:39:24.520996: step: 132/77, loss: 0.026997998356819153 2023-01-22 07:39:25.787844: step: 136/77, loss: 0.021584073081612587 2023-01-22 07:39:27.088310: step: 140/77, loss: 0.013400858268141747 2023-01-22 07:39:28.392260: step: 144/77, loss: 0.021433046087622643 2023-01-22 07:39:29.681824: step: 148/77, loss: 0.025518041104078293 2023-01-22 07:39:30.978802: step: 152/77, loss: 0.050987645983695984 2023-01-22 07:39:32.257411: step: 156/77, loss: 0.010998288169503212 2023-01-22 07:39:33.543917: step: 160/77, loss: 0.046618834137916565 2023-01-22 07:39:34.877596: step: 164/77, loss: 0.06384487450122833 2023-01-22 07:39:36.158577: step: 168/77, loss: 0.016236823052167892 2023-01-22 07:39:37.469053: step: 172/77, loss: 0.031272441148757935 2023-01-22 07:39:38.779140: step: 176/77, loss: 0.036344870924949646 2023-01-22 07:39:40.104505: step: 180/77, loss: 0.02237529307603836 2023-01-22 07:39:41.434026: step: 184/77, loss: 0.034511446952819824 2023-01-22 07:39:42.704470: step: 188/77, loss: 0.038873665034770966 2023-01-22 07:39:43.983572: step: 192/77, loss: 0.03066616877913475 2023-01-22 07:39:45.281432: step: 196/77, loss: 0.0007045451202429831 2023-01-22 07:39:46.609308: step: 200/77, loss: 0.06683935970067978 2023-01-22 07:39:47.905804: step: 204/77, loss: 0.030061164870858192 2023-01-22 07:39:49.186881: step: 208/77, loss: 0.05396779626607895 2023-01-22 07:39:50.490062: step: 212/77, loss: 0.012820908799767494 2023-01-22 07:39:51.793578: step: 216/77, loss: 0.009320125915110111 2023-01-22 07:39:53.127249: step: 220/77, loss: 0.0747046247124672 2023-01-22 07:39:54.408144: step: 224/77, loss: 0.009744771756231785 2023-01-22 07:39:55.706414: step: 228/77, loss: 0.004006761126220226 2023-01-22 07:39:56.993496: step: 232/77, loss: 0.013168903067708015 2023-01-22 07:39:58.273178: step: 236/77, loss: 0.07993464171886444 2023-01-22 07:39:59.566641: step: 240/77, loss: 0.03175496682524681 2023-01-22 07:40:00.872502: step: 244/77, loss: 0.023099705576896667 2023-01-22 07:40:02.185524: step: 248/77, loss: 0.0038555117789655924 2023-01-22 07:40:03.458407: step: 252/77, loss: 0.04137454554438591 2023-01-22 07:40:04.758898: step: 256/77, loss: 0.002440269570797682 2023-01-22 07:40:06.073980: step: 260/77, loss: 0.007243657484650612 2023-01-22 07:40:07.368328: step: 264/77, loss: 0.03407406806945801 2023-01-22 07:40:08.674363: step: 268/77, loss: 0.04122215881943703 2023-01-22 07:40:09.992267: step: 272/77, loss: 0.037244781851768494 2023-01-22 07:40:11.255550: step: 276/77, loss: 0.04572469741106033 2023-01-22 07:40:12.541870: step: 280/77, loss: 0.022644642740488052 2023-01-22 07:40:13.883722: step: 284/77, loss: 0.0011615646071732044 2023-01-22 07:40:15.174227: step: 288/77, loss: 0.014798080548644066 2023-01-22 07:40:16.454201: step: 292/77, loss: 0.023985128849744797 2023-01-22 07:40:17.766940: step: 296/77, loss: 0.02150266245007515 2023-01-22 07:40:19.092898: step: 300/77, loss: 0.007881814613938332 2023-01-22 07:40:20.407587: step: 304/77, loss: 0.031038884073495865 2023-01-22 07:40:21.688103: step: 308/77, loss: 0.004278877750039101 2023-01-22 07:40:22.971578: step: 312/77, loss: 0.007023588288575411 2023-01-22 07:40:24.246862: step: 316/77, loss: 0.05231161415576935 2023-01-22 07:40:25.574434: step: 320/77, loss: 0.06902115792036057 2023-01-22 07:40:26.887614: step: 324/77, loss: 0.012647450901567936 2023-01-22 07:40:28.165607: step: 328/77, loss: 0.07448658347129822 2023-01-22 07:40:29.514055: step: 332/77, loss: 0.018296927213668823 2023-01-22 07:40:30.815303: step: 336/77, loss: 0.04754916578531265 2023-01-22 07:40:32.115802: step: 340/77, loss: 0.23820245265960693 2023-01-22 07:40:33.399897: step: 344/77, loss: 0.02240445651113987 2023-01-22 07:40:34.660332: step: 348/77, loss: 0.047524698078632355 2023-01-22 07:40:35.948560: step: 352/77, loss: 0.04264065623283386 2023-01-22 07:40:37.223284: step: 356/77, loss: 0.07267670333385468 2023-01-22 07:40:38.571083: step: 360/77, loss: 0.02015545964241028 2023-01-22 07:40:39.840425: step: 364/77, loss: 0.04709238559007645 2023-01-22 07:40:41.181004: step: 368/77, loss: 0.09137171506881714 2023-01-22 07:40:42.461146: step: 372/77, loss: 0.05773291736841202 2023-01-22 07:40:43.766111: step: 376/77, loss: 0.02109794318675995 2023-01-22 07:40:45.099306: step: 380/77, loss: 0.02238389477133751 2023-01-22 07:40:46.433660: step: 384/77, loss: 0.04301886260509491 2023-01-22 07:40:47.791388: step: 388/77, loss: 0.01773441582918167 ================================================== Loss: 0.031 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test Chinese: {'template': {'p': 0.9466666666666667, 'r': 0.5590551181102362, 'f1': 0.7029702970297029}, 'slot': {'p': 0.7307692307692307, 'r': 0.01643598615916955, 'f1': 0.032148900169204735}, 'combined': 0.02259972190112412, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test Korean: {'template': {'p': 0.9594594594594594, 'r': 0.5590551181102362, 'f1': 0.7064676616915422}, 'slot': {'p': 0.7307692307692307, 'r': 0.01643598615916955, 'f1': 0.032148900169204735}, 'combined': 0.022712158328492892, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test Russian: {'template': {'p': 0.9342105263157895, 'r': 0.5590551181102362, 'f1': 0.6995073891625616}, 'slot': {'p': 0.7407407407407407, 'r': 0.01730103806228374, 'f1': 0.03381234150464919}, 'combined': 0.023651982727390077, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 6 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:42:31.020496: step: 4/77, loss: 0.019876815378665924 2023-01-22 07:42:32.295823: step: 8/77, loss: 0.006573471240699291 2023-01-22 07:42:33.578291: step: 12/77, loss: 0.015288086608052254 2023-01-22 07:42:34.836625: step: 16/77, loss: 0.03685459867119789 2023-01-22 07:42:36.115479: step: 20/77, loss: 0.025068249553442 2023-01-22 07:42:37.439649: step: 24/77, loss: 0.004917709622532129 2023-01-22 07:42:38.726624: step: 28/77, loss: 0.041336797177791595 2023-01-22 07:42:40.005314: step: 32/77, loss: 0.015547524206340313 2023-01-22 07:42:41.314944: step: 36/77, loss: 0.022921713069081306 2023-01-22 07:42:42.624301: step: 40/77, loss: 0.0036199199967086315 2023-01-22 07:42:43.935484: step: 44/77, loss: 0.0027790761087089777 2023-01-22 07:42:45.218976: step: 48/77, loss: 0.011076805181801319 2023-01-22 07:42:46.492335: step: 52/77, loss: 0.00021039244893472642 2023-01-22 07:42:47.847000: step: 56/77, loss: 0.002944371895864606 2023-01-22 07:42:49.164208: step: 60/77, loss: 0.025461489334702492 2023-01-22 07:42:50.482401: step: 64/77, loss: 0.04321763664484024 2023-01-22 07:42:51.789700: step: 68/77, loss: 0.005923053249716759 2023-01-22 07:42:53.118617: step: 72/77, loss: 0.0004399519821163267 2023-01-22 07:42:54.447476: step: 76/77, loss: 0.010212014429271221 2023-01-22 07:42:55.774488: step: 80/77, loss: 0.0259318295866251 2023-01-22 07:42:57.039977: step: 84/77, loss: 0.022111227735877037 2023-01-22 07:42:58.330725: step: 88/77, loss: 0.023291099816560745 2023-01-22 07:42:59.649269: step: 92/77, loss: 0.018929187208414078 2023-01-22 07:43:00.937397: step: 96/77, loss: 0.014732517302036285 2023-01-22 07:43:02.188571: step: 100/77, loss: 0.02497703768312931 2023-01-22 07:43:03.505713: step: 104/77, loss: 0.068994902074337 2023-01-22 07:43:04.775489: step: 108/77, loss: 0.02583613246679306 2023-01-22 07:43:06.078241: step: 112/77, loss: 0.05988915264606476 2023-01-22 07:43:07.351201: step: 116/77, loss: 0.060604531317949295 2023-01-22 07:43:08.641426: step: 120/77, loss: 0.020236942917108536 2023-01-22 07:43:09.983240: step: 124/77, loss: 0.049723561853170395 2023-01-22 07:43:11.262087: step: 128/77, loss: 0.008909706957638264 2023-01-22 07:43:12.541028: step: 132/77, loss: 0.0024141171015799046 2023-01-22 07:43:13.801058: step: 136/77, loss: 0.02390531823039055 2023-01-22 07:43:15.076025: step: 140/77, loss: 0.057607464492321014 2023-01-22 07:43:16.393231: step: 144/77, loss: 0.00355730508454144 2023-01-22 07:43:17.682924: step: 148/77, loss: 0.010880752466619015 2023-01-22 07:43:19.003630: step: 152/77, loss: 0.016689520329236984 2023-01-22 07:43:20.330417: step: 156/77, loss: 0.0038037316408008337 2023-01-22 07:43:21.644344: step: 160/77, loss: 0.017490610480308533 2023-01-22 07:43:22.921062: step: 164/77, loss: 0.0006976961740292609 2023-01-22 07:43:24.199021: step: 168/77, loss: 0.008576530031859875 2023-01-22 07:43:25.517502: step: 172/77, loss: 0.054237280040979385 2023-01-22 07:43:26.829941: step: 176/77, loss: 0.025560835376381874 2023-01-22 07:43:28.142483: step: 180/77, loss: 0.02417994663119316 2023-01-22 07:43:29.476618: step: 184/77, loss: 0.05693662539124489 2023-01-22 07:43:30.804186: step: 188/77, loss: 0.0032692819368094206 2023-01-22 07:43:32.092883: step: 192/77, loss: 0.008004766888916492 2023-01-22 07:43:33.432679: step: 196/77, loss: 0.06952568143606186 2023-01-22 07:43:34.753571: step: 200/77, loss: 0.019822662696242332 2023-01-22 07:43:36.050843: step: 204/77, loss: 0.05271018296480179 2023-01-22 07:43:37.344619: step: 208/77, loss: 0.012663454748690128 2023-01-22 07:43:38.666548: step: 212/77, loss: 0.007480953354388475 2023-01-22 07:43:39.949843: step: 216/77, loss: 0.07426632195711136 2023-01-22 07:43:41.234383: step: 220/77, loss: 0.021370479837059975 2023-01-22 07:43:42.522220: step: 224/77, loss: 0.003257451578974724 2023-01-22 07:43:43.811775: step: 228/77, loss: 0.010057792067527771 2023-01-22 07:43:45.133205: step: 232/77, loss: 0.06914263963699341 2023-01-22 07:43:46.448207: step: 236/77, loss: 0.0067864772863686085 2023-01-22 07:43:47.782502: step: 240/77, loss: 0.05100144073367119 2023-01-22 07:43:49.076965: step: 244/77, loss: 0.04917469248175621 2023-01-22 07:43:50.421329: step: 248/77, loss: 0.03630446270108223 2023-01-22 07:43:51.750814: step: 252/77, loss: 0.0772506445646286 2023-01-22 07:43:53.022315: step: 256/77, loss: 0.014203687198460102 2023-01-22 07:43:54.313736: step: 260/77, loss: 0.053320031613111496 2023-01-22 07:43:55.661457: step: 264/77, loss: 0.015590444207191467 2023-01-22 07:43:57.026967: step: 268/77, loss: 0.01844686083495617 2023-01-22 07:43:58.348142: step: 272/77, loss: 0.04268677532672882 2023-01-22 07:43:59.667901: step: 276/77, loss: 0.023348961025476456 2023-01-22 07:44:00.989258: step: 280/77, loss: 0.013826183043420315 2023-01-22 07:44:02.243546: step: 284/77, loss: 0.014793286100029945 2023-01-22 07:44:03.549719: step: 288/77, loss: 0.009313910268247128 2023-01-22 07:44:04.870083: step: 292/77, loss: 0.023486994206905365 2023-01-22 07:44:06.144553: step: 296/77, loss: 0.022837920114398003 2023-01-22 07:44:07.440734: step: 300/77, loss: 0.006142630707472563 2023-01-22 07:44:08.746569: step: 304/77, loss: 0.014211054891347885 2023-01-22 07:44:10.048434: step: 308/77, loss: 0.03037683293223381 2023-01-22 07:44:11.340182: step: 312/77, loss: 0.014047280885279179 2023-01-22 07:44:12.706990: step: 316/77, loss: 0.01892756298184395 2023-01-22 07:44:14.020066: step: 320/77, loss: 0.009444999508559704 2023-01-22 07:44:15.353065: step: 324/77, loss: 0.002001096960157156 2023-01-22 07:44:16.639882: step: 328/77, loss: 0.03141818940639496 2023-01-22 07:44:17.917810: step: 332/77, loss: 0.006161955185234547 2023-01-22 07:44:19.171688: step: 336/77, loss: 0.004971682094037533 2023-01-22 07:44:20.471386: step: 340/77, loss: 0.00327905360609293 2023-01-22 07:44:21.796679: step: 344/77, loss: 0.02420627512037754 2023-01-22 07:44:23.127142: step: 348/77, loss: 0.0510546937584877 2023-01-22 07:44:24.475624: step: 352/77, loss: 0.0859951302409172 2023-01-22 07:44:25.862175: step: 356/77, loss: 0.004114494659006596 2023-01-22 07:44:27.184584: step: 360/77, loss: 0.05849332734942436 2023-01-22 07:44:28.488774: step: 364/77, loss: 0.012778722681105137 2023-01-22 07:44:29.799696: step: 368/77, loss: 0.0038407668471336365 2023-01-22 07:44:31.113634: step: 372/77, loss: 0.0127643421292305 2023-01-22 07:44:32.440207: step: 376/77, loss: 0.013638246804475784 2023-01-22 07:44:33.745551: step: 380/77, loss: 0.03323771804571152 2023-01-22 07:44:35.075839: step: 384/77, loss: 0.0005775393219664693 2023-01-22 07:44:36.352425: step: 388/77, loss: 0.00024180466425605118 ================================================== Loss: 0.024 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 6} Test Chinese: {'template': {'p': 0.9594594594594594, 'r': 0.5590551181102362, 'f1': 0.7064676616915422}, 'slot': {'p': 0.6363636363636364, 'r': 0.018166089965397925, 'f1': 0.03532380151387721}, 'combined': 0.02495512345756499, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 6} Test Korean: {'template': {'p': 0.96, 'r': 0.5669291338582677, 'f1': 0.712871287128713}, 'slot': {'p': 0.6285714285714286, 'r': 0.01903114186851211, 'f1': 0.036943744752308987}, 'combined': 0.026336134872933143, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 6} Test Russian: {'template': {'p': 0.9466666666666667, 'r': 0.5590551181102362, 'f1': 0.7029702970297029}, 'slot': {'p': 0.6176470588235294, 'r': 0.018166089965397925, 'f1': 0.03529411764705882}, 'combined': 0.024810716365754222, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 7 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:46:19.943439: step: 4/77, loss: 0.014091260731220245 2023-01-22 07:46:21.218170: step: 8/77, loss: 0.000623593048658222 2023-01-22 07:46:22.539314: step: 12/77, loss: 0.0031894927378743887 2023-01-22 07:46:23.832796: step: 16/77, loss: 0.0008812348823994398 2023-01-22 07:46:25.146351: step: 20/77, loss: 0.014165188185870647 2023-01-22 07:46:26.452454: step: 24/77, loss: 0.01300637237727642 2023-01-22 07:46:27.760625: step: 28/77, loss: 0.003656288841739297 2023-01-22 07:46:29.039563: step: 32/77, loss: 0.004824704956263304 2023-01-22 07:46:30.297436: step: 36/77, loss: 0.0013270446797832847 2023-01-22 07:46:31.594417: step: 40/77, loss: 0.006385405547916889 2023-01-22 07:46:32.846609: step: 44/77, loss: 0.005329822190105915 2023-01-22 07:46:34.142416: step: 48/77, loss: 0.001831568544730544 2023-01-22 07:46:35.444027: step: 52/77, loss: 0.038037534803152084 2023-01-22 07:46:36.700330: step: 56/77, loss: 0.009928743354976177 2023-01-22 07:46:37.992845: step: 60/77, loss: 0.01838652975857258 2023-01-22 07:46:39.245770: step: 64/77, loss: 0.02379920333623886 2023-01-22 07:46:40.550861: step: 68/77, loss: 0.035091448575258255 2023-01-22 07:46:41.885678: step: 72/77, loss: 0.004036274738609791 2023-01-22 07:46:43.216029: step: 76/77, loss: 0.008674428798258305 2023-01-22 07:46:44.447484: step: 80/77, loss: 0.001725957146845758 2023-01-22 07:46:45.727169: step: 84/77, loss: 0.0015108429361134768 2023-01-22 07:46:47.048266: step: 88/77, loss: 0.012778017669916153 2023-01-22 07:46:48.360609: step: 92/77, loss: 0.02824033610522747 2023-01-22 07:46:49.629453: step: 96/77, loss: 0.000985867576673627 2023-01-22 07:46:50.924474: step: 100/77, loss: 0.028359783813357353 2023-01-22 07:46:52.231451: step: 104/77, loss: 0.004324179142713547 2023-01-22 07:46:53.585800: step: 108/77, loss: 0.27105656266212463 2023-01-22 07:46:54.868695: step: 112/77, loss: 0.019737599417567253 2023-01-22 07:46:56.168313: step: 116/77, loss: 0.009240414947271347 2023-01-22 07:46:57.519471: step: 120/77, loss: 0.004861234221607447 2023-01-22 07:46:58.826888: step: 124/77, loss: 0.03176651522517204 2023-01-22 07:47:00.168779: step: 128/77, loss: 0.0288337804377079 2023-01-22 07:47:01.464895: step: 132/77, loss: 0.007038864307105541 2023-01-22 07:47:02.753258: step: 136/77, loss: 0.012824473902583122 2023-01-22 07:47:04.019873: step: 140/77, loss: 0.006008431315422058 2023-01-22 07:47:05.335120: step: 144/77, loss: 0.057693202048540115 2023-01-22 07:47:06.611112: step: 148/77, loss: 0.025265760719776154 2023-01-22 07:47:07.900864: step: 152/77, loss: 0.006553241517394781 2023-01-22 07:47:09.203432: step: 156/77, loss: 0.004011549986898899 2023-01-22 07:47:10.513610: step: 160/77, loss: 0.009259817190468311 2023-01-22 07:47:11.796209: step: 164/77, loss: 0.005273655988276005 2023-01-22 07:47:13.072572: step: 168/77, loss: 0.06386923044919968 2023-01-22 07:47:14.315217: step: 172/77, loss: 0.021036511287093163 2023-01-22 07:47:15.609214: step: 176/77, loss: 0.01947185769677162 2023-01-22 07:47:16.926210: step: 180/77, loss: 0.0021108086220920086 2023-01-22 07:47:18.198368: step: 184/77, loss: 0.0008717880118638277 2023-01-22 07:47:19.499757: step: 188/77, loss: 0.003025084501132369 2023-01-22 07:47:20.813262: step: 192/77, loss: 0.021636178717017174 2023-01-22 07:47:22.105116: step: 196/77, loss: 0.018800625577569008 2023-01-22 07:47:23.360987: step: 200/77, loss: 0.017199119552969933 2023-01-22 07:47:24.675077: step: 204/77, loss: 0.04579906910657883 2023-01-22 07:47:25.957978: step: 208/77, loss: 0.04112239554524422 2023-01-22 07:47:27.229137: step: 212/77, loss: 0.017853224650025368 2023-01-22 07:47:28.502479: step: 216/77, loss: 0.01161262672394514 2023-01-22 07:47:29.824154: step: 220/77, loss: 0.01685524359345436 2023-01-22 07:47:31.123029: step: 224/77, loss: 0.0001876596943475306 2023-01-22 07:47:32.406232: step: 228/77, loss: 0.007752103731036186 2023-01-22 07:47:33.661020: step: 232/77, loss: 0.012243285775184631 2023-01-22 07:47:35.022874: step: 236/77, loss: 0.06392502039670944 2023-01-22 07:47:36.358011: step: 240/77, loss: 0.03169390186667442 2023-01-22 07:47:37.653344: step: 244/77, loss: 0.05966990441083908 2023-01-22 07:47:38.984921: step: 248/77, loss: 0.0021885111927986145 2023-01-22 07:47:40.283389: step: 252/77, loss: 0.16490457952022552 2023-01-22 07:47:41.569468: step: 256/77, loss: 0.027331221848726273 2023-01-22 07:47:42.866749: step: 260/77, loss: 0.019304146990180016 2023-01-22 07:47:44.143245: step: 264/77, loss: 0.0014638921711593866 2023-01-22 07:47:45.443751: step: 268/77, loss: 0.04890373349189758 2023-01-22 07:47:46.728651: step: 272/77, loss: 0.0009773285128176212 2023-01-22 07:47:48.030881: step: 276/77, loss: 0.0015155820874497294 2023-01-22 07:47:49.368707: step: 280/77, loss: 0.00379112153314054 2023-01-22 07:47:50.633399: step: 284/77, loss: 0.03754507750272751 2023-01-22 07:47:51.900320: step: 288/77, loss: 0.021101878955960274 2023-01-22 07:47:53.205942: step: 292/77, loss: 0.019474411383271217 2023-01-22 07:47:54.525698: step: 296/77, loss: 0.018521897494792938 2023-01-22 07:47:55.813625: step: 300/77, loss: 0.001744049252010882 2023-01-22 07:47:57.084385: step: 304/77, loss: 0.006619489751756191 2023-01-22 07:47:58.375082: step: 308/77, loss: 0.03244337439537048 2023-01-22 07:47:59.680175: step: 312/77, loss: 0.0007960422663018107 2023-01-22 07:48:00.999214: step: 316/77, loss: 0.010220680385828018 2023-01-22 07:48:02.334248: step: 320/77, loss: 0.01762489601969719 2023-01-22 07:48:03.647713: step: 324/77, loss: 0.14098475873470306 2023-01-22 07:48:04.966764: step: 328/77, loss: 0.00601299898698926 2023-01-22 07:48:06.279390: step: 332/77, loss: 0.012527402490377426 2023-01-22 07:48:07.600784: step: 336/77, loss: 0.000528876087628305 2023-01-22 07:48:08.899231: step: 340/77, loss: 0.0026341930497437716 2023-01-22 07:48:10.187906: step: 344/77, loss: 0.05650842562317848 2023-01-22 07:48:11.462601: step: 348/77, loss: 0.014022696763277054 2023-01-22 07:48:12.775997: step: 352/77, loss: 0.0021680789068341255 2023-01-22 07:48:14.078399: step: 356/77, loss: 8.263064228231087e-05 2023-01-22 07:48:15.347058: step: 360/77, loss: 0.020330294966697693 2023-01-22 07:48:16.682094: step: 364/77, loss: 0.04933106154203415 2023-01-22 07:48:17.981501: step: 368/77, loss: 0.001341789378784597 2023-01-22 07:48:19.237421: step: 372/77, loss: 0.002500483300536871 2023-01-22 07:48:20.539387: step: 376/77, loss: 0.041689809411764145 2023-01-22 07:48:21.864584: step: 380/77, loss: 0.023196659982204437 2023-01-22 07:48:23.135966: step: 384/77, loss: 0.1326698660850525 2023-01-22 07:48:24.468020: step: 388/77, loss: 0.04675935208797455 ================================================== Loss: 0.023 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Chinese: {'template': {'p': 0.9466666666666667, 'r': 0.5590551181102362, 'f1': 0.7029702970297029}, 'slot': {'p': 0.6470588235294118, 'r': 0.01903114186851211, 'f1': 0.03697478991596639}, 'combined': 0.025992179049837756, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Korean: {'template': {'p': 0.9473684210526315, 'r': 0.5669291338582677, 'f1': 0.70935960591133}, 'slot': {'p': 0.6388888888888888, 'r': 0.019896193771626297, 'f1': 0.03859060402684564}, 'combined': 0.027374615664363406, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Russian: {'template': {'p': 0.9466666666666667, 'r': 0.5590551181102362, 'f1': 0.7029702970297029}, 'slot': {'p': 0.6470588235294118, 'r': 0.01903114186851211, 'f1': 0.03697478991596639}, 'combined': 0.025992179049837756, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 8 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:50:07.913137: step: 4/77, loss: 0.03909171372652054 2023-01-22 07:50:09.216021: step: 8/77, loss: 0.04083281010389328 2023-01-22 07:50:10.499009: step: 12/77, loss: 0.02047703228890896 2023-01-22 07:50:11.794674: step: 16/77, loss: 0.028162088245153427 2023-01-22 07:50:13.128347: step: 20/77, loss: 0.019167069345712662 2023-01-22 07:50:14.398232: step: 24/77, loss: 0.030149690806865692 2023-01-22 07:50:15.699131: step: 28/77, loss: 0.04303986579179764 2023-01-22 07:50:17.023985: step: 32/77, loss: 0.0117753054946661 2023-01-22 07:50:18.332300: step: 36/77, loss: 0.005166348069906235 2023-01-22 07:50:19.625342: step: 40/77, loss: 0.0857258141040802 2023-01-22 07:50:20.907077: step: 44/77, loss: 0.00024584803031757474 2023-01-22 07:50:22.251258: step: 48/77, loss: 0.011664533987641335 2023-01-22 07:50:23.488884: step: 52/77, loss: 0.034976307302713394 2023-01-22 07:50:24.774329: step: 56/77, loss: 0.006636979524046183 2023-01-22 07:50:26.041613: step: 60/77, loss: 0.05367900803685188 2023-01-22 07:50:27.338315: step: 64/77, loss: 0.048779040575027466 2023-01-22 07:50:28.661713: step: 68/77, loss: 0.0183818731456995 2023-01-22 07:50:29.975911: step: 72/77, loss: 0.010400182567536831 2023-01-22 07:50:31.246454: step: 76/77, loss: 0.05637083947658539 2023-01-22 07:50:32.497596: step: 80/77, loss: 0.0005344225792214274 2023-01-22 07:50:33.799564: step: 84/77, loss: 0.00033000862458720803 2023-01-22 07:50:35.079424: step: 88/77, loss: 0.018841277807950974 2023-01-22 07:50:36.370035: step: 92/77, loss: 0.023982657119631767 2023-01-22 07:50:37.630054: step: 96/77, loss: 0.005666761659085751 2023-01-22 07:50:38.899182: step: 100/77, loss: 0.05443336069583893 2023-01-22 07:50:40.218782: step: 104/77, loss: 0.0061767855659127235 2023-01-22 07:50:41.521507: step: 108/77, loss: 0.008791591040790081 2023-01-22 07:50:42.800141: step: 112/77, loss: 0.01809808798134327 2023-01-22 07:50:44.079955: step: 116/77, loss: 0.03698926791548729 2023-01-22 07:50:45.350631: step: 120/77, loss: 0.02199491485953331 2023-01-22 07:50:46.587435: step: 124/77, loss: 0.002461447613313794 2023-01-22 07:50:47.852915: step: 128/77, loss: 0.013487190008163452 2023-01-22 07:50:49.078724: step: 132/77, loss: 0.028719313442707062 2023-01-22 07:50:50.409991: step: 136/77, loss: 0.07887116074562073 2023-01-22 07:50:51.724547: step: 140/77, loss: 0.06149774789810181 2023-01-22 07:50:53.005015: step: 144/77, loss: 0.005725775845348835 2023-01-22 07:50:54.290828: step: 148/77, loss: 0.007735730614513159 2023-01-22 07:50:55.590663: step: 152/77, loss: 0.013402965851128101 2023-01-22 07:50:56.927307: step: 156/77, loss: 0.0026370673440396786 2023-01-22 07:50:58.206988: step: 160/77, loss: 0.0006408776971511543 2023-01-22 07:50:59.510671: step: 164/77, loss: 0.015061311423778534 2023-01-22 07:51:00.819710: step: 168/77, loss: 0.009904321283102036 2023-01-22 07:51:02.140554: step: 172/77, loss: 0.013677509501576424 2023-01-22 07:51:03.469954: step: 176/77, loss: 0.06900405138731003 2023-01-22 07:51:04.767245: step: 180/77, loss: 0.018023595213890076 2023-01-22 07:51:06.066111: step: 184/77, loss: 0.031008142977952957 2023-01-22 07:51:07.356369: step: 188/77, loss: 0.03302126005291939 2023-01-22 07:51:08.634407: step: 192/77, loss: 0.007039917167276144 2023-01-22 07:51:09.937939: step: 196/77, loss: 0.002231169492006302 2023-01-22 07:51:11.239590: step: 200/77, loss: 0.008826125413179398 2023-01-22 07:51:12.571870: step: 204/77, loss: 0.016067208722233772 2023-01-22 07:51:13.876671: step: 208/77, loss: 0.0906267911195755 2023-01-22 07:51:15.156921: step: 212/77, loss: 0.003105991752818227 2023-01-22 07:51:16.481969: step: 216/77, loss: 0.03075207956135273 2023-01-22 07:51:17.819784: step: 220/77, loss: 0.04361942410469055 2023-01-22 07:51:19.110813: step: 224/77, loss: 0.010997829958796501 2023-01-22 07:51:20.430772: step: 228/77, loss: 0.01769120618700981 2023-01-22 07:51:21.748083: step: 232/77, loss: 0.008495952002704144 2023-01-22 07:51:23.044194: step: 236/77, loss: 0.0071330564096570015 2023-01-22 07:51:24.334673: step: 240/77, loss: 0.00010605436546029523 2023-01-22 07:51:25.647317: step: 244/77, loss: 0.016715597361326218 2023-01-22 07:51:26.938071: step: 248/77, loss: 0.007385910488665104 2023-01-22 07:51:28.200482: step: 252/77, loss: 0.01896490901708603 2023-01-22 07:51:29.473377: step: 256/77, loss: 0.039543408900499344 2023-01-22 07:51:30.779118: step: 260/77, loss: 0.014666267670691013 2023-01-22 07:51:32.108938: step: 264/77, loss: 0.012767759151756763 2023-01-22 07:51:33.428617: step: 268/77, loss: 0.011936817318201065 2023-01-22 07:51:34.702542: step: 272/77, loss: 0.005859396420419216 2023-01-22 07:51:35.970030: step: 276/77, loss: 0.0017303384374827147 2023-01-22 07:51:37.247471: step: 280/77, loss: 0.05166729539632797 2023-01-22 07:51:38.557579: step: 284/77, loss: 0.028724398463964462 2023-01-22 07:51:39.857474: step: 288/77, loss: 0.027749208733439445 2023-01-22 07:51:41.125579: step: 292/77, loss: 0.008368278853595257 2023-01-22 07:51:42.431042: step: 296/77, loss: 0.013057144358754158 2023-01-22 07:51:43.789354: step: 300/77, loss: 0.011596021242439747 2023-01-22 07:51:45.114164: step: 304/77, loss: 0.0006313698249869049 2023-01-22 07:51:46.401183: step: 308/77, loss: 0.052643369883298874 2023-01-22 07:51:47.667702: step: 312/77, loss: 0.010507049039006233 2023-01-22 07:51:48.974932: step: 316/77, loss: 0.01833794265985489 2023-01-22 07:51:50.283858: step: 320/77, loss: 0.007980125956237316 2023-01-22 07:51:51.573700: step: 324/77, loss: 0.006608211435377598 2023-01-22 07:51:52.819024: step: 328/77, loss: 0.02376813068985939 2023-01-22 07:51:54.138972: step: 332/77, loss: 6.894932448631153e-05 2023-01-22 07:51:55.403745: step: 336/77, loss: 0.011127547360956669 2023-01-22 07:51:56.736309: step: 340/77, loss: 0.009277567267417908 2023-01-22 07:51:58.033616: step: 344/77, loss: 0.04166052117943764 2023-01-22 07:51:59.372944: step: 348/77, loss: 0.046544525772333145 2023-01-22 07:52:00.653176: step: 352/77, loss: 0.0034268973395228386 2023-01-22 07:52:01.949589: step: 356/77, loss: 0.030889064073562622 2023-01-22 07:52:03.272647: step: 360/77, loss: 0.047395166009664536 2023-01-22 07:52:04.569934: step: 364/77, loss: 0.009107564575970173 2023-01-22 07:52:05.901815: step: 368/77, loss: 0.02338031865656376 2023-01-22 07:52:07.184227: step: 372/77, loss: 0.04273316636681557 2023-01-22 07:52:08.454751: step: 376/77, loss: 0.017735697329044342 2023-01-22 07:52:09.786814: step: 380/77, loss: 5.373685416998342e-05 2023-01-22 07:52:11.084254: step: 384/77, loss: 3.152326462441124e-05 2023-01-22 07:52:12.396074: step: 388/77, loss: 0.02466980367898941 ================================================== Loss: 0.022 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Chinese: {'template': {'p': 0.9324324324324325, 'r': 0.5433070866141733, 'f1': 0.6865671641791046}, 'slot': {'p': 0.6388888888888888, 'r': 0.019896193771626297, 'f1': 0.03859060402684564}, 'combined': 0.026495041570670144, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Korean: {'template': {'p': 0.918918918918919, 'r': 0.5354330708661418, 'f1': 0.6766169154228856}, 'slot': {'p': 0.6052631578947368, 'r': 0.019896193771626297, 'f1': 0.038525963149078725}, 'combined': 0.026067318349625408, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Russian: {'template': {'p': 0.9315068493150684, 'r': 0.5354330708661418, 'f1': 0.6799999999999999}, 'slot': {'p': 0.6216216216216216, 'r': 0.019896193771626297, 'f1': 0.03855825649622799}, 'combined': 0.02621961441743503, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 9 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:53:55.854845: step: 4/77, loss: 0.004985678009688854 2023-01-22 07:53:57.167702: step: 8/77, loss: 3.863613983412506e-06 2023-01-22 07:53:58.465439: step: 12/77, loss: 0.0014464558335021138 2023-01-22 07:53:59.713304: step: 16/77, loss: 0.005553169641643763 2023-01-22 07:54:01.028667: step: 20/77, loss: 0.0022822213359177113 2023-01-22 07:54:02.330330: step: 24/77, loss: 0.01146892923861742 2023-01-22 07:54:03.611181: step: 28/77, loss: 0.005904552526772022 2023-01-22 07:54:04.910700: step: 32/77, loss: 0.09429851174354553 2023-01-22 07:54:06.215554: step: 36/77, loss: 0.0034883248154073954 2023-01-22 07:54:07.573304: step: 40/77, loss: 0.04095402732491493 2023-01-22 07:54:08.884673: step: 44/77, loss: 0.001694257603958249 2023-01-22 07:54:10.183466: step: 48/77, loss: 0.030682552605867386 2023-01-22 07:54:11.488945: step: 52/77, loss: 0.003378424560651183 2023-01-22 07:54:12.814944: step: 56/77, loss: 0.02623351290822029 2023-01-22 07:54:14.145936: step: 60/77, loss: 0.017934948205947876 2023-01-22 07:54:15.404669: step: 64/77, loss: 0.004223274532705545 2023-01-22 07:54:16.694185: step: 68/77, loss: 0.002698099473491311 2023-01-22 07:54:18.006858: step: 72/77, loss: 0.00402219919487834 2023-01-22 07:54:19.297105: step: 76/77, loss: 0.013955757021903992 2023-01-22 07:54:20.655753: step: 80/77, loss: 0.04232095927000046 2023-01-22 07:54:21.893869: step: 84/77, loss: 0.024030989035964012 2023-01-22 07:54:23.190197: step: 88/77, loss: 0.017762908712029457 2023-01-22 07:54:24.509303: step: 92/77, loss: 0.008113443851470947 2023-01-22 07:54:25.777570: step: 96/77, loss: 0.005497557111084461 2023-01-22 07:54:27.066106: step: 100/77, loss: 0.014417735859751701 2023-01-22 07:54:28.384736: step: 104/77, loss: 0.03320337459445 2023-01-22 07:54:29.724926: step: 108/77, loss: 0.06536999344825745 2023-01-22 07:54:30.975716: step: 112/77, loss: 0.006031247787177563 2023-01-22 07:54:32.282952: step: 116/77, loss: 0.011752675287425518 2023-01-22 07:54:33.557864: step: 120/77, loss: 0.002265932969748974 2023-01-22 07:54:34.841886: step: 124/77, loss: 0.03896312415599823 2023-01-22 07:54:36.135079: step: 128/77, loss: 0.02578730881214142 2023-01-22 07:54:37.444389: step: 132/77, loss: 0.013655617833137512 2023-01-22 07:54:38.761612: step: 136/77, loss: 0.0001007022219710052 2023-01-22 07:54:40.042966: step: 140/77, loss: 0.02868984267115593 2023-01-22 07:54:41.404457: step: 144/77, loss: 0.0007975643966346979 2023-01-22 07:54:42.694930: step: 148/77, loss: 0.000653879891615361 2023-01-22 07:54:44.027223: step: 152/77, loss: 0.0015032761730253696 2023-01-22 07:54:45.332628: step: 156/77, loss: 0.007357191760092974 2023-01-22 07:54:46.616705: step: 160/77, loss: 0.010839682072401047 2023-01-22 07:54:47.905856: step: 164/77, loss: 0.008317798376083374 2023-01-22 07:54:49.225415: step: 168/77, loss: 0.00936589390039444 2023-01-22 07:54:50.569853: step: 172/77, loss: 0.007547073066234589 2023-01-22 07:54:51.870888: step: 176/77, loss: 0.005637632217258215 2023-01-22 07:54:53.146458: step: 180/77, loss: 0.010725868865847588 2023-01-22 07:54:54.416298: step: 184/77, loss: 0.027186449617147446 2023-01-22 07:54:55.723624: step: 188/77, loss: 0.004381542094051838 2023-01-22 07:54:57.025714: step: 192/77, loss: 0.017316747456789017 2023-01-22 07:54:58.302083: step: 196/77, loss: 0.009244048036634922 2023-01-22 07:54:59.621185: step: 200/77, loss: 0.011271592229604721 2023-01-22 07:55:00.962218: step: 204/77, loss: 0.023467810824513435 2023-01-22 07:55:02.301883: step: 208/77, loss: 0.01169417891651392 2023-01-22 07:55:03.553761: step: 212/77, loss: 0.018470294773578644 2023-01-22 07:55:04.833836: step: 216/77, loss: 0.0076330117881298065 2023-01-22 07:55:06.126836: step: 220/77, loss: 0.04853637143969536 2023-01-22 07:55:07.432734: step: 224/77, loss: 0.026607543230056763 2023-01-22 07:55:08.754637: step: 228/77, loss: 0.013537176884710789 2023-01-22 07:55:10.000818: step: 232/77, loss: 0.02130974270403385 2023-01-22 07:55:11.286005: step: 236/77, loss: 0.026897411793470383 2023-01-22 07:55:12.580132: step: 240/77, loss: 8.246965444413945e-05 2023-01-22 07:55:13.823015: step: 244/77, loss: 0.03548573702573776 2023-01-22 07:55:15.102315: step: 248/77, loss: 0.01436652708798647 2023-01-22 07:55:16.417906: step: 252/77, loss: 0.00015419725968968123 2023-01-22 07:55:17.760551: step: 256/77, loss: 0.030067767947912216 2023-01-22 07:55:19.062069: step: 260/77, loss: 0.03384627774357796 2023-01-22 07:55:20.327861: step: 264/77, loss: 0.009953079745173454 2023-01-22 07:55:21.593005: step: 268/77, loss: 0.08683308959007263 2023-01-22 07:55:22.915366: step: 272/77, loss: 0.05754532665014267 2023-01-22 07:55:24.181501: step: 276/77, loss: 0.001794222043827176 2023-01-22 07:55:25.474354: step: 280/77, loss: 0.011959494091570377 2023-01-22 07:55:26.776690: step: 284/77, loss: 0.013683785684406757 2023-01-22 07:55:28.076769: step: 288/77, loss: 0.0354127436876297 2023-01-22 07:55:29.379282: step: 292/77, loss: 0.00022566976258531213 2023-01-22 07:55:30.655489: step: 296/77, loss: 0.05970189720392227 2023-01-22 07:55:31.902186: step: 300/77, loss: 0.001040084520354867 2023-01-22 07:55:33.231009: step: 304/77, loss: 0.0011714230058714747 2023-01-22 07:55:34.520561: step: 308/77, loss: 0.0047460198402404785 2023-01-22 07:55:35.797671: step: 312/77, loss: 0.02429712750017643 2023-01-22 07:55:37.088973: step: 316/77, loss: 0.026674669235944748 2023-01-22 07:55:38.405071: step: 320/77, loss: 0.010548888705670834 2023-01-22 07:55:39.685734: step: 324/77, loss: 0.006413971073925495 2023-01-22 07:55:40.965754: step: 328/77, loss: 0.004589063581079245 2023-01-22 07:55:42.195517: step: 332/77, loss: 0.06987367570400238 2023-01-22 07:55:43.540469: step: 336/77, loss: 0.0022110692225396633 2023-01-22 07:55:44.797020: step: 340/77, loss: 0.017331529408693314 2023-01-22 07:55:46.098925: step: 344/77, loss: 0.010062351822853088 2023-01-22 07:55:47.381412: step: 348/77, loss: 0.031465794891119 2023-01-22 07:55:48.693658: step: 352/77, loss: 0.02157551422715187 2023-01-22 07:55:50.004056: step: 356/77, loss: 0.02133113332092762 2023-01-22 07:55:51.341462: step: 360/77, loss: 0.008301690220832825 2023-01-22 07:55:52.613787: step: 364/77, loss: 0.09201312810182571 2023-01-22 07:55:53.919652: step: 368/77, loss: 0.006819041445851326 2023-01-22 07:55:55.204705: step: 372/77, loss: 0.006157260853797197 2023-01-22 07:55:56.510330: step: 376/77, loss: 0.0027986792847514153 2023-01-22 07:55:57.805172: step: 380/77, loss: 0.003340411465615034 2023-01-22 07:55:59.096109: step: 384/77, loss: 0.026231326162815094 2023-01-22 07:56:00.403627: step: 388/77, loss: 0.006379453465342522 ================================================== Loss: 0.018 -------------------- Dev Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05107997664915353, 'epoch': 9} Test Chinese: {'template': {'p': 0.9090909090909091, 'r': 0.5511811023622047, 'f1': 0.6862745098039216}, 'slot': {'p': 0.5581395348837209, 'r': 0.020761245674740483, 'f1': 0.040033361134278564}, 'combined': 0.027473875288230386, 'epoch': 9} Dev Korean: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05107997664915353, 'epoch': 9} Test Korean: {'template': {'p': 0.9090909090909091, 'r': 0.5511811023622047, 'f1': 0.6862745098039216}, 'slot': {'p': 0.5365853658536586, 'r': 0.01903114186851211, 'f1': 0.036758563074352546}, 'combined': 0.025226464854947825, 'epoch': 9} Dev Russian: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.47619047619047616, 'r': 0.03780718336483932, 'f1': 0.07005253940455342}, 'combined': 0.05107997664915353, 'epoch': 9} Test Russian: {'template': {'p': 0.9090909090909091, 'r': 0.5511811023622047, 'f1': 0.6862745098039216}, 'slot': {'p': 0.5581395348837209, 'r': 0.020761245674740483, 'f1': 0.040033361134278564}, 'combined': 0.027473875288230386, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 10 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:57:43.805797: step: 4/77, loss: 0.034885916858911514 2023-01-22 07:57:45.044112: step: 8/77, loss: 0.025922216475009918 2023-01-22 07:57:46.399906: step: 12/77, loss: 0.018966445699334145 2023-01-22 07:57:47.710007: step: 16/77, loss: 0.007525881752371788 2023-01-22 07:57:48.965011: step: 20/77, loss: 0.00297100399620831 2023-01-22 07:57:50.265133: step: 24/77, loss: 0.022766780108213425 2023-01-22 07:57:51.552805: step: 28/77, loss: 0.005847464315593243 2023-01-22 07:57:52.864595: step: 32/77, loss: 0.0022809680085629225 2023-01-22 07:57:54.122200: step: 36/77, loss: 0.0008194573456421494 2023-01-22 07:57:55.417355: step: 40/77, loss: 0.007935411296784878 2023-01-22 07:57:56.715868: step: 44/77, loss: 0.01920030452311039 2023-01-22 07:57:58.013429: step: 48/77, loss: 0.02657415345311165 2023-01-22 07:57:59.308039: step: 52/77, loss: 0.0011227937648072839 2023-01-22 07:58:00.600250: step: 56/77, loss: 0.0020502624101936817 2023-01-22 07:58:01.877695: step: 60/77, loss: 0.04245453327894211 2023-01-22 07:58:03.212945: step: 64/77, loss: 0.0015092970570549369 2023-01-22 07:58:04.439949: step: 68/77, loss: 0.022562090307474136 2023-01-22 07:58:05.783086: step: 72/77, loss: 0.02246209979057312 2023-01-22 07:58:07.056999: step: 76/77, loss: 0.00920990388840437 2023-01-22 07:58:08.313211: step: 80/77, loss: 8.592795893491711e-06 2023-01-22 07:58:09.578942: step: 84/77, loss: 0.0023487601429224014 2023-01-22 07:58:10.899695: step: 88/77, loss: 0.010793939232826233 2023-01-22 07:58:12.197998: step: 92/77, loss: 0.11554623395204544 2023-01-22 07:58:13.439522: step: 96/77, loss: 0.01655627228319645 2023-01-22 07:58:14.706238: step: 100/77, loss: 0.01300885435193777 2023-01-22 07:58:15.961883: step: 104/77, loss: 0.028351565822958946 2023-01-22 07:58:17.299507: step: 108/77, loss: 0.0009378978866152465 2023-01-22 07:58:18.569980: step: 112/77, loss: 0.02903454378247261 2023-01-22 07:58:19.843914: step: 116/77, loss: 0.000774328364059329 2023-01-22 07:58:21.137463: step: 120/77, loss: 0.00030405400320887566 2023-01-22 07:58:22.446137: step: 124/77, loss: 0.007459406740963459 2023-01-22 07:58:23.730512: step: 128/77, loss: 0.031962983310222626 2023-01-22 07:58:25.079983: step: 132/77, loss: 0.0007741327863186598 2023-01-22 07:58:26.402482: step: 136/77, loss: 0.001975886756554246 2023-01-22 07:58:27.740049: step: 140/77, loss: 0.0006277412758208811 2023-01-22 07:58:29.058389: step: 144/77, loss: 0.00018086688942275941 2023-01-22 07:58:30.369577: step: 148/77, loss: 0.025780564174056053 2023-01-22 07:58:31.648955: step: 152/77, loss: 0.0059568691067397594 2023-01-22 07:58:32.944884: step: 156/77, loss: 0.0019144975813105702 2023-01-22 07:58:34.235634: step: 160/77, loss: 0.031837016344070435 2023-01-22 07:58:35.598564: step: 164/77, loss: 0.0005284567596390843 2023-01-22 07:58:36.892041: step: 168/77, loss: 0.006015297025442123 2023-01-22 07:58:38.225507: step: 172/77, loss: 0.0005159162101335824 2023-01-22 07:58:39.530958: step: 176/77, loss: 0.03496188297867775 2023-01-22 07:58:40.835404: step: 180/77, loss: 0.021903792396187782 2023-01-22 07:58:42.125056: step: 184/77, loss: 0.00761670945212245 2023-01-22 07:58:43.418201: step: 188/77, loss: 0.02137245237827301 2023-01-22 07:58:44.693501: step: 192/77, loss: 0.007971450686454773 2023-01-22 07:58:45.970182: step: 196/77, loss: 0.005901147145777941 2023-01-22 07:58:47.338976: step: 200/77, loss: 0.014218917116522789 2023-01-22 07:58:48.633458: step: 204/77, loss: 0.021402429789304733 2023-01-22 07:58:49.909447: step: 208/77, loss: 0.04705355688929558 2023-01-22 07:58:51.195621: step: 212/77, loss: 0.04193755239248276 2023-01-22 07:58:52.479890: step: 216/77, loss: 0.009806877002120018 2023-01-22 07:58:53.783784: step: 220/77, loss: 0.012827225029468536 2023-01-22 07:58:55.088988: step: 224/77, loss: 0.016613418236374855 2023-01-22 07:58:56.427598: step: 228/77, loss: 0.04089561104774475 2023-01-22 07:58:57.728506: step: 232/77, loss: 0.022792337462306023 2023-01-22 07:58:59.007577: step: 236/77, loss: 0.0008552936487831175 2023-01-22 07:59:00.304551: step: 240/77, loss: 0.01568450964987278 2023-01-22 07:59:01.604243: step: 244/77, loss: 0.007928731851279736 2023-01-22 07:59:02.880571: step: 248/77, loss: 3.1405874324264005e-05 2023-01-22 07:59:04.191835: step: 252/77, loss: 0.006512498948723078 2023-01-22 07:59:05.485659: step: 256/77, loss: 0.00061405188171193 2023-01-22 07:59:06.826748: step: 260/77, loss: 0.010790829546749592 2023-01-22 07:59:08.208611: step: 264/77, loss: 0.015889519825577736 2023-01-22 07:59:09.535521: step: 268/77, loss: 0.032671183347702026 2023-01-22 07:59:10.778338: step: 272/77, loss: 0.08655279874801636 2023-01-22 07:59:12.097548: step: 276/77, loss: 0.009039109572768211 2023-01-22 07:59:13.333820: step: 280/77, loss: 0.010499601252377033 2023-01-22 07:59:14.667472: step: 284/77, loss: 0.0036574043333530426 2023-01-22 07:59:15.992544: step: 288/77, loss: 0.022612404078245163 2023-01-22 07:59:17.273444: step: 292/77, loss: 0.000292752287350595 2023-01-22 07:59:18.512084: step: 296/77, loss: 0.0065917023457586765 2023-01-22 07:59:19.802527: step: 300/77, loss: 0.0009956855792552233 2023-01-22 07:59:21.129961: step: 304/77, loss: 0.006485821679234505 2023-01-22 07:59:22.383955: step: 308/77, loss: 0.060920167714357376 2023-01-22 07:59:23.655781: step: 312/77, loss: 0.003009687177836895 2023-01-22 07:59:24.942201: step: 316/77, loss: 0.09430146217346191 2023-01-22 07:59:26.272724: step: 320/77, loss: 0.013427263125777245 2023-01-22 07:59:27.545528: step: 324/77, loss: 0.041191231459379196 2023-01-22 07:59:28.841456: step: 328/77, loss: 0.05467259883880615 2023-01-22 07:59:30.104753: step: 332/77, loss: 0.0028161683585494757 2023-01-22 07:59:31.390607: step: 336/77, loss: 0.01637094095349312 2023-01-22 07:59:32.680228: step: 340/77, loss: 0.05374343320727348 2023-01-22 07:59:34.006635: step: 344/77, loss: 0.024343477562069893 2023-01-22 07:59:35.273093: step: 348/77, loss: 0.020151842385530472 2023-01-22 07:59:36.619843: step: 352/77, loss: 0.06068253517150879 2023-01-22 07:59:37.941630: step: 356/77, loss: 0.01416888926178217 2023-01-22 07:59:39.260388: step: 360/77, loss: 0.025863591581583023 2023-01-22 07:59:40.562511: step: 364/77, loss: 0.006620073691010475 2023-01-22 07:59:41.867944: step: 368/77, loss: 0.007929705083370209 2023-01-22 07:59:43.181949: step: 372/77, loss: 2.493147985660471e-05 2023-01-22 07:59:44.526786: step: 376/77, loss: 0.07580535858869553 2023-01-22 07:59:45.863217: step: 380/77, loss: 0.006525078788399696 2023-01-22 07:59:47.134269: step: 384/77, loss: 0.004044658504426479 2023-01-22 07:59:48.415430: step: 388/77, loss: 0.14865602552890778 ================================================== Loss: 0.020 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04988944951527864, 'epoch': 10} Test Chinese: {'template': {'p': 0.9565217391304348, 'r': 0.5196850393700787, 'f1': 0.673469387755102}, 'slot': {'p': 0.5714285714285714, 'r': 0.01730103806228374, 'f1': 0.033585222502099076}, 'combined': 0.02261861923610754, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04988944951527864, 'epoch': 10} Test Korean: {'template': {'p': 0.9565217391304348, 'r': 0.5196850393700787, 'f1': 0.673469387755102}, 'slot': {'p': 0.5714285714285714, 'r': 0.01730103806228374, 'f1': 0.033585222502099076}, 'combined': 0.02261861923610754, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04988944951527864, 'epoch': 10} Test Russian: {'template': {'p': 0.9571428571428572, 'r': 0.5275590551181102, 'f1': 0.680203045685279}, 'slot': {'p': 0.5714285714285714, 'r': 0.01730103806228374, 'f1': 0.033585222502099076}, 'combined': 0.022844770635945557, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 11 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:01:31.787063: step: 4/77, loss: 0.034290995448827744 2023-01-22 08:01:33.131605: step: 8/77, loss: 0.003060400253161788 2023-01-22 08:01:34.428699: step: 12/77, loss: 0.016873084008693695 2023-01-22 08:01:35.758189: step: 16/77, loss: 0.044476039707660675 2023-01-22 08:01:37.052141: step: 20/77, loss: 0.1535889059305191 2023-01-22 08:01:38.345031: step: 24/77, loss: 0.02128823660314083 2023-01-22 08:01:39.651520: step: 28/77, loss: 0.005422608926892281 2023-01-22 08:01:40.979902: step: 32/77, loss: 0.0096100103110075 2023-01-22 08:01:42.291879: step: 36/77, loss: 0.043695032596588135 2023-01-22 08:01:43.529486: step: 40/77, loss: 0.07770724594593048 2023-01-22 08:01:44.807434: step: 44/77, loss: 0.0007819668389856815 2023-01-22 08:01:46.105744: step: 48/77, loss: 0.014020076021552086 2023-01-22 08:01:47.347646: step: 52/77, loss: 0.008628785610198975 2023-01-22 08:01:48.652327: step: 56/77, loss: 0.0005642471369355917 2023-01-22 08:01:50.000949: step: 60/77, loss: 0.006489362567663193 2023-01-22 08:01:51.323842: step: 64/77, loss: 0.0010391109390184283 2023-01-22 08:01:52.597102: step: 68/77, loss: 0.0034049940295517445 2023-01-22 08:01:53.934799: step: 72/77, loss: 0.021129030734300613 2023-01-22 08:01:55.252709: step: 76/77, loss: 0.000404253660235554 2023-01-22 08:01:56.538588: step: 80/77, loss: 0.029367826879024506 2023-01-22 08:01:57.818883: step: 84/77, loss: 0.000733956869225949 2023-01-22 08:01:59.135579: step: 88/77, loss: 0.015665430575609207 2023-01-22 08:02:00.437983: step: 92/77, loss: 0.0027978597208857536 2023-01-22 08:02:01.773690: step: 96/77, loss: 0.04804677516222 2023-01-22 08:02:03.039234: step: 100/77, loss: 0.006995899602770805 2023-01-22 08:02:04.309693: step: 104/77, loss: 0.0016960708890110254 2023-01-22 08:02:05.588754: step: 108/77, loss: 0.02806232124567032 2023-01-22 08:02:06.908167: step: 112/77, loss: 0.040813934057950974 2023-01-22 08:02:08.211393: step: 116/77, loss: 0.04757900536060333 2023-01-22 08:02:09.486668: step: 120/77, loss: 0.06963416934013367 2023-01-22 08:02:10.750720: step: 124/77, loss: 0.008742544800043106 2023-01-22 08:02:12.010676: step: 128/77, loss: 0.0003210227587260306 2023-01-22 08:02:13.330250: step: 132/77, loss: 0.03588179498910904 2023-01-22 08:02:14.635151: step: 136/77, loss: 0.00010144505358766764 2023-01-22 08:02:15.919335: step: 140/77, loss: 0.028663935139775276 2023-01-22 08:02:17.200302: step: 144/77, loss: 0.004867684096097946 2023-01-22 08:02:18.478494: step: 148/77, loss: 0.006418164353817701 2023-01-22 08:02:19.792652: step: 152/77, loss: 0.010072818025946617 2023-01-22 08:02:21.111041: step: 156/77, loss: 0.0030261133797466755 2023-01-22 08:02:22.428296: step: 160/77, loss: 0.012433314695954323 2023-01-22 08:02:23.729473: step: 164/77, loss: 1.3659241631103214e-05 2023-01-22 08:02:25.008450: step: 168/77, loss: 0.010078934952616692 2023-01-22 08:02:26.319671: step: 172/77, loss: 0.008565003052353859 2023-01-22 08:02:27.623563: step: 176/77, loss: 0.0013229025062173605 2023-01-22 08:02:28.922749: step: 180/77, loss: 0.0023587632458657026 2023-01-22 08:02:30.232649: step: 184/77, loss: 0.015727929770946503 2023-01-22 08:02:31.532434: step: 188/77, loss: 0.0003538941964507103 2023-01-22 08:02:32.810176: step: 192/77, loss: 0.005967576522380114 2023-01-22 08:02:34.059551: step: 196/77, loss: 0.0034459210000932217 2023-01-22 08:02:35.331878: step: 200/77, loss: 0.0008628435316495597 2023-01-22 08:02:36.636745: step: 204/77, loss: 0.003831970738247037 2023-01-22 08:02:37.933884: step: 208/77, loss: 0.04347836226224899 2023-01-22 08:02:39.291767: step: 212/77, loss: 0.03995771333575249 2023-01-22 08:02:40.571976: step: 216/77, loss: 0.08100616931915283 2023-01-22 08:02:41.900940: step: 220/77, loss: 0.0014499538810923696 2023-01-22 08:02:43.165912: step: 224/77, loss: 0.016846509650349617 2023-01-22 08:02:44.532908: step: 228/77, loss: 0.001104260329157114 2023-01-22 08:02:45.798110: step: 232/77, loss: 0.007652912754565477 2023-01-22 08:02:47.146969: step: 236/77, loss: 0.00373618071898818 2023-01-22 08:02:48.473970: step: 240/77, loss: 0.03783169016242027 2023-01-22 08:02:49.758362: step: 244/77, loss: 0.01822231523692608 2023-01-22 08:02:51.129548: step: 248/77, loss: 0.00047604707651771605 2023-01-22 08:02:52.431882: step: 252/77, loss: 0.04717876762151718 2023-01-22 08:02:53.733654: step: 256/77, loss: 0.00917772762477398 2023-01-22 08:02:54.995966: step: 260/77, loss: 0.008899427019059658 2023-01-22 08:02:56.269602: step: 264/77, loss: 0.0171928983181715 2023-01-22 08:02:57.555941: step: 268/77, loss: 0.0008396904449909925 2023-01-22 08:02:58.864729: step: 272/77, loss: 0.0042204721830785275 2023-01-22 08:03:00.170887: step: 276/77, loss: 0.09980832785367966 2023-01-22 08:03:01.478032: step: 280/77, loss: 0.02319612167775631 2023-01-22 08:03:02.771528: step: 284/77, loss: 0.045040421187877655 2023-01-22 08:03:04.074867: step: 288/77, loss: 1.880646959762089e-05 2023-01-22 08:03:05.339479: step: 292/77, loss: 0.04108387976884842 2023-01-22 08:03:06.620350: step: 296/77, loss: 0.012998824939131737 2023-01-22 08:03:07.874719: step: 300/77, loss: 0.007574758492410183 2023-01-22 08:03:09.182489: step: 304/77, loss: 0.025194745510816574 2023-01-22 08:03:10.438885: step: 308/77, loss: 0.0016742986626923084 2023-01-22 08:03:11.760698: step: 312/77, loss: 0.006551905535161495 2023-01-22 08:03:13.094285: step: 316/77, loss: 0.0009671220905147493 2023-01-22 08:03:14.450129: step: 320/77, loss: 0.01129376981407404 2023-01-22 08:03:15.760043: step: 324/77, loss: 0.028160715475678444 2023-01-22 08:03:17.033797: step: 328/77, loss: 0.008353251032531261 2023-01-22 08:03:18.349106: step: 332/77, loss: 0.012021275237202644 2023-01-22 08:03:19.686229: step: 336/77, loss: 0.005083122290670872 2023-01-22 08:03:21.007611: step: 340/77, loss: 0.011651956476271152 2023-01-22 08:03:22.255267: step: 344/77, loss: 0.008507365360856056 2023-01-22 08:03:23.523363: step: 348/77, loss: 3.128040407318622e-05 2023-01-22 08:03:24.794180: step: 352/77, loss: 0.003318456234410405 2023-01-22 08:03:26.129767: step: 356/77, loss: 0.022203654050827026 2023-01-22 08:03:27.407606: step: 360/77, loss: 0.007937486283481121 2023-01-22 08:03:28.663626: step: 364/77, loss: 0.01284043863415718 2023-01-22 08:03:29.966172: step: 368/77, loss: 0.001803778694011271 2023-01-22 08:03:31.256858: step: 372/77, loss: 0.03172057494521141 2023-01-22 08:03:32.558324: step: 376/77, loss: 0.009788584895431995 2023-01-22 08:03:33.843978: step: 380/77, loss: 0.020835664123296738 2023-01-22 08:03:35.158531: step: 384/77, loss: 0.012116208672523499 2023-01-22 08:03:36.436798: step: 388/77, loss: 0.018130645155906677 ================================================== Loss: 0.018 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Chinese: {'template': {'p': 0.9240506329113924, 'r': 0.5748031496062992, 'f1': 0.7087378640776699}, 'slot': {'p': 0.574468085106383, 'r': 0.023356401384083045, 'f1': 0.044887780548628436}, 'combined': 0.031813669709222094, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Korean: {'template': {'p': 0.9012345679012346, 'r': 0.5748031496062992, 'f1': 0.701923076923077}, 'slot': {'p': 0.574468085106383, 'r': 0.023356401384083045, 'f1': 0.044887780548628436}, 'combined': 0.03150776903894112, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Russian: {'template': {'p': 0.9240506329113924, 'r': 0.5748031496062992, 'f1': 0.7087378640776699}, 'slot': {'p': 0.574468085106383, 'r': 0.023356401384083045, 'f1': 0.044887780548628436}, 'combined': 0.031813669709222094, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 12 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:05:19.876599: step: 4/77, loss: 0.009073804132640362 2023-01-22 08:05:21.184432: step: 8/77, loss: 0.009715680964291096 2023-01-22 08:05:22.472239: step: 12/77, loss: 0.06424564123153687 2023-01-22 08:05:23.756893: step: 16/77, loss: 0.06681496649980545 2023-01-22 08:05:25.015411: step: 20/77, loss: 0.002531634643673897 2023-01-22 08:05:26.276313: step: 24/77, loss: 0.0005347937112674117 2023-01-22 08:05:27.576109: step: 28/77, loss: 0.04869755357503891 2023-01-22 08:05:28.908239: step: 32/77, loss: 0.051517218351364136 2023-01-22 08:05:30.205836: step: 36/77, loss: 0.008260335773229599 2023-01-22 08:05:31.498186: step: 40/77, loss: 3.5608525649877265e-05 2023-01-22 08:05:32.784328: step: 44/77, loss: 0.03319783881306648 2023-01-22 08:05:34.099695: step: 48/77, loss: 0.0017925648717209697 2023-01-22 08:05:35.382469: step: 52/77, loss: 0.00929616391658783 2023-01-22 08:05:36.663842: step: 56/77, loss: 0.0027852028142660856 2023-01-22 08:05:37.961337: step: 60/77, loss: 0.020751886069774628 2023-01-22 08:05:39.270514: step: 64/77, loss: 0.0002482630079612136 2023-01-22 08:05:40.588257: step: 68/77, loss: 0.0005034186178818345 2023-01-22 08:05:41.891075: step: 72/77, loss: 0.00035241639125160873 2023-01-22 08:05:43.160979: step: 76/77, loss: 0.00031961730564944446 2023-01-22 08:05:44.485447: step: 80/77, loss: 0.007452554069459438 2023-01-22 08:05:45.784252: step: 84/77, loss: 0.007456920575350523 2023-01-22 08:05:47.068663: step: 88/77, loss: 0.009758615866303444 2023-01-22 08:05:48.367956: step: 92/77, loss: 0.0012029914651066065 2023-01-22 08:05:49.642645: step: 96/77, loss: 3.199763159500435e-05 2023-01-22 08:05:50.912339: step: 100/77, loss: 0.04619689658284187 2023-01-22 08:05:52.239268: step: 104/77, loss: 0.009478705003857613 2023-01-22 08:05:53.608186: step: 108/77, loss: 8.02664362709038e-05 2023-01-22 08:05:54.921794: step: 112/77, loss: 0.004843482282012701 2023-01-22 08:05:56.192564: step: 116/77, loss: 0.003468831069767475 2023-01-22 08:05:57.465351: step: 120/77, loss: 0.02320878580212593 2023-01-22 08:05:58.799730: step: 124/77, loss: 5.919792329223128e-06 2023-01-22 08:06:00.074855: step: 128/77, loss: 0.034282077103853226 2023-01-22 08:06:01.370624: step: 132/77, loss: 0.0011821096995845437 2023-01-22 08:06:02.687841: step: 136/77, loss: 0.030070718377828598 2023-01-22 08:06:03.964490: step: 140/77, loss: 0.004016416613012552 2023-01-22 08:06:05.233991: step: 144/77, loss: 0.005289752967655659 2023-01-22 08:06:06.519869: step: 148/77, loss: 0.001714541227556765 2023-01-22 08:06:07.825187: step: 152/77, loss: 0.001026333775371313 2023-01-22 08:06:09.141230: step: 156/77, loss: 7.020994871709263e-06 2023-01-22 08:06:10.440688: step: 160/77, loss: 0.0012839919654652476 2023-01-22 08:06:11.736844: step: 164/77, loss: 0.00011994199303444475 2023-01-22 08:06:13.021716: step: 168/77, loss: 0.0008939295657910407 2023-01-22 08:06:14.350379: step: 172/77, loss: 0.00872259121388197 2023-01-22 08:06:15.673050: step: 176/77, loss: 9.06147306523053e-06 2023-01-22 08:06:16.974462: step: 180/77, loss: 0.007759299129247665 2023-01-22 08:06:18.262971: step: 184/77, loss: 0.08365102112293243 2023-01-22 08:06:19.557513: step: 188/77, loss: 0.010917061939835548 2023-01-22 08:06:20.818887: step: 192/77, loss: 0.008356427773833275 2023-01-22 08:06:22.080448: step: 196/77, loss: 0.0070846471935510635 2023-01-22 08:06:23.360057: step: 200/77, loss: 0.018262486904859543 2023-01-22 08:06:24.627860: step: 204/77, loss: 0.019870104268193245 2023-01-22 08:06:25.916736: step: 208/77, loss: 5.0123155233450234e-05 2023-01-22 08:06:27.201081: step: 212/77, loss: 0.03604806587100029 2023-01-22 08:06:28.470116: step: 216/77, loss: 0.0016053176950663328 2023-01-22 08:06:29.798362: step: 220/77, loss: 0.03923279047012329 2023-01-22 08:06:31.098198: step: 224/77, loss: 0.1039530336856842 2023-01-22 08:06:32.402747: step: 228/77, loss: 0.0012448562774807215 2023-01-22 08:06:33.704234: step: 232/77, loss: 0.0024955361150205135 2023-01-22 08:06:34.963309: step: 236/77, loss: 0.009963253512978554 2023-01-22 08:06:36.248912: step: 240/77, loss: 0.04958149790763855 2023-01-22 08:06:37.562899: step: 244/77, loss: 0.0008791973232291639 2023-01-22 08:06:38.919004: step: 248/77, loss: 0.05147349089384079 2023-01-22 08:06:40.254987: step: 252/77, loss: 0.00014931612531654537 2023-01-22 08:06:41.526477: step: 256/77, loss: 0.016675325110554695 2023-01-22 08:06:42.814140: step: 260/77, loss: 0.014337164349853992 2023-01-22 08:06:44.132660: step: 264/77, loss: 0.018950382247567177 2023-01-22 08:06:45.445535: step: 268/77, loss: 7.146938150981441e-05 2023-01-22 08:06:46.724847: step: 272/77, loss: 0.017762595787644386 2023-01-22 08:06:48.085780: step: 276/77, loss: 0.00015441945288330317 2023-01-22 08:06:49.401764: step: 280/77, loss: 0.0028129005804657936 2023-01-22 08:06:50.765906: step: 284/77, loss: 0.14359217882156372 2023-01-22 08:06:52.065744: step: 288/77, loss: 0.0016476346645504236 2023-01-22 08:06:53.438087: step: 292/77, loss: 0.0030373530462384224 2023-01-22 08:06:54.768709: step: 296/77, loss: 0.001313269603997469 2023-01-22 08:06:56.061887: step: 300/77, loss: 0.026861613616347313 2023-01-22 08:06:57.319493: step: 304/77, loss: 0.01838192157447338 2023-01-22 08:06:58.622720: step: 308/77, loss: 0.009071671403944492 2023-01-22 08:06:59.900002: step: 312/77, loss: 0.018952257931232452 2023-01-22 08:07:01.163855: step: 316/77, loss: 0.02215559035539627 2023-01-22 08:07:02.504726: step: 320/77, loss: 0.0001870796550065279 2023-01-22 08:07:03.822342: step: 324/77, loss: 0.0033369308803230524 2023-01-22 08:07:05.129844: step: 328/77, loss: 0.015525397844612598 2023-01-22 08:07:06.423108: step: 332/77, loss: 0.003829201217740774 2023-01-22 08:07:07.712005: step: 336/77, loss: 0.011689679697155952 2023-01-22 08:07:09.010956: step: 340/77, loss: 0.00465412437915802 2023-01-22 08:07:10.317795: step: 344/77, loss: 0.010710624977946281 2023-01-22 08:07:11.627923: step: 348/77, loss: 0.005850036628544331 2023-01-22 08:07:12.943516: step: 352/77, loss: 0.0008918998064473271 2023-01-22 08:07:14.224515: step: 356/77, loss: 0.007814115844666958 2023-01-22 08:07:15.541647: step: 360/77, loss: 0.018171032890677452 2023-01-22 08:07:16.862630: step: 364/77, loss: 0.0254492349922657 2023-01-22 08:07:18.169564: step: 368/77, loss: 0.0004618678358383477 2023-01-22 08:07:19.407445: step: 372/77, loss: 0.03245954215526581 2023-01-22 08:07:20.738052: step: 376/77, loss: 0.013895172625780106 2023-01-22 08:07:22.012008: step: 380/77, loss: 0.00014311702398117632 2023-01-22 08:07:23.334491: step: 384/77, loss: 0.07297346740961075 2023-01-22 08:07:24.671620: step: 388/77, loss: 0.017625009641051292 ================================================== Loss: 0.016 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Chinese: {'template': {'p': 0.922077922077922, 'r': 0.5590551181102362, 'f1': 0.696078431372549}, 'slot': {'p': 0.5952380952380952, 'r': 0.02162629757785467, 'f1': 0.041736227045075125}, 'combined': 0.02905168745294445, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Korean: {'template': {'p': 0.922077922077922, 'r': 0.5590551181102362, 'f1': 0.696078431372549}, 'slot': {'p': 0.5813953488372093, 'r': 0.02162629757785467, 'f1': 0.041701417848206836}, 'combined': 0.02902745752179103, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Russian: {'template': {'p': 0.922077922077922, 'r': 0.5590551181102362, 'f1': 0.696078431372549}, 'slot': {'p': 0.5813953488372093, 'r': 0.02162629757785467, 'f1': 0.041701417848206836}, 'combined': 0.02902745752179103, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 13 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:09:08.106088: step: 4/77, loss: 0.011534569784998894 2023-01-22 08:09:09.372280: step: 8/77, loss: 7.703698065597564e-05 2023-01-22 08:09:10.637529: step: 12/77, loss: 0.0074141984805464745 2023-01-22 08:09:11.913788: step: 16/77, loss: 0.003941691946238279 2023-01-22 08:09:13.152371: step: 20/77, loss: 0.007250982336699963 2023-01-22 08:09:14.457992: step: 24/77, loss: 0.024784471839666367 2023-01-22 08:09:15.725295: step: 28/77, loss: 0.0064056264236569405 2023-01-22 08:09:17.075275: step: 32/77, loss: 0.0310469102114439 2023-01-22 08:09:18.381399: step: 36/77, loss: 0.017922699451446533 2023-01-22 08:09:19.618776: step: 40/77, loss: 0.01132173277437687 2023-01-22 08:09:20.962935: step: 44/77, loss: 0.0026819310151040554 2023-01-22 08:09:22.262537: step: 48/77, loss: 0.0073762321844697 2023-01-22 08:09:23.609523: step: 52/77, loss: 0.0018268930725753307 2023-01-22 08:09:24.933328: step: 56/77, loss: 0.012388748116791248 2023-01-22 08:09:26.236609: step: 60/77, loss: 0.0009196172468364239 2023-01-22 08:09:27.560590: step: 64/77, loss: 0.0004850560799241066 2023-01-22 08:09:28.924267: step: 68/77, loss: 0.03814251720905304 2023-01-22 08:09:30.218753: step: 72/77, loss: 0.024647535756230354 2023-01-22 08:09:31.491217: step: 76/77, loss: 0.006834662053734064 2023-01-22 08:09:32.773905: step: 80/77, loss: 0.014149656519293785 2023-01-22 08:09:34.089984: step: 84/77, loss: 0.029407711699604988 2023-01-22 08:09:35.334373: step: 88/77, loss: 0.054918814450502396 2023-01-22 08:09:36.590433: step: 92/77, loss: 0.019674647599458694 2023-01-22 08:09:37.892774: step: 96/77, loss: 0.0006954215932637453 2023-01-22 08:09:39.199479: step: 100/77, loss: 3.290568929514848e-05 2023-01-22 08:09:40.517445: step: 104/77, loss: 0.00822521187365055 2023-01-22 08:09:41.857177: step: 108/77, loss: 0.0008668411173857749 2023-01-22 08:09:43.152807: step: 112/77, loss: 0.017215151339769363 2023-01-22 08:09:44.510352: step: 116/77, loss: 0.0025849805679172277 2023-01-22 08:09:45.813840: step: 120/77, loss: 0.012226728722453117 2023-01-22 08:09:47.189490: step: 124/77, loss: 0.0035495597403496504 2023-01-22 08:09:48.433585: step: 128/77, loss: 0.005428432486951351 2023-01-22 08:09:49.703650: step: 132/77, loss: 0.008224151097238064 2023-01-22 08:09:50.969330: step: 136/77, loss: 0.039925575256347656 2023-01-22 08:09:52.321086: step: 140/77, loss: 0.02613389492034912 2023-01-22 08:09:53.603973: step: 144/77, loss: 0.004684390965849161 2023-01-22 08:09:54.893778: step: 148/77, loss: 0.0002262677444377914 2023-01-22 08:09:56.216063: step: 152/77, loss: 0.012023281306028366 2023-01-22 08:09:57.490355: step: 156/77, loss: 0.04951193183660507 2023-01-22 08:09:58.793603: step: 160/77, loss: 0.008101816289126873 2023-01-22 08:10:00.001711: step: 164/77, loss: 0.008501788601279259 2023-01-22 08:10:01.310825: step: 168/77, loss: 0.013952715322375298 2023-01-22 08:10:02.621495: step: 172/77, loss: 0.00011134293890791014 2023-01-22 08:10:03.950954: step: 176/77, loss: 0.004148687236011028 2023-01-22 08:10:05.222848: step: 180/77, loss: 0.0027915844693779945 2023-01-22 08:10:06.474141: step: 184/77, loss: 0.03257248178124428 2023-01-22 08:10:07.786030: step: 188/77, loss: 0.014188706874847412 2023-01-22 08:10:09.083321: step: 192/77, loss: 0.0040221610106527805 2023-01-22 08:10:10.425963: step: 196/77, loss: 0.038398560136556625 2023-01-22 08:10:11.736309: step: 200/77, loss: 2.7238772872806294e-06 2023-01-22 08:10:13.066641: step: 204/77, loss: 0.005252287723124027 2023-01-22 08:10:14.373241: step: 208/77, loss: 0.00662798760458827 2023-01-22 08:10:15.656763: step: 212/77, loss: 0.0014862954849377275 2023-01-22 08:10:16.948366: step: 216/77, loss: 0.06251075118780136 2023-01-22 08:10:18.256889: step: 220/77, loss: 0.04855694621801376 2023-01-22 08:10:19.546972: step: 224/77, loss: 8.610729855718091e-05 2023-01-22 08:10:20.873208: step: 228/77, loss: 0.00452599348500371 2023-01-22 08:10:22.145377: step: 232/77, loss: 0.003628335427492857 2023-01-22 08:10:23.380614: step: 236/77, loss: 0.001405022805556655 2023-01-22 08:10:24.668689: step: 240/77, loss: 0.0004565988201647997 2023-01-22 08:10:25.943914: step: 244/77, loss: 5.832927854498848e-05 2023-01-22 08:10:27.258199: step: 248/77, loss: 0.001974995480850339 2023-01-22 08:10:28.558547: step: 252/77, loss: 0.006026091054081917 2023-01-22 08:10:29.887630: step: 256/77, loss: 0.0033578251022845507 2023-01-22 08:10:31.186805: step: 260/77, loss: 0.049207091331481934 2023-01-22 08:10:32.473909: step: 264/77, loss: 0.006522368639707565 2023-01-22 08:10:33.738443: step: 268/77, loss: 0.02521488070487976 2023-01-22 08:10:35.008061: step: 272/77, loss: 0.035877011716365814 2023-01-22 08:10:36.315945: step: 276/77, loss: 0.006761828437447548 2023-01-22 08:10:37.631925: step: 280/77, loss: 0.0008893448393791914 2023-01-22 08:10:38.940459: step: 284/77, loss: 0.07028082758188248 2023-01-22 08:10:40.208313: step: 288/77, loss: 1.4272099178924691e-05 2023-01-22 08:10:41.505637: step: 292/77, loss: 0.024424489587545395 2023-01-22 08:10:42.770152: step: 296/77, loss: 0.005749974399805069 2023-01-22 08:10:44.080785: step: 300/77, loss: 0.00023848118144087493 2023-01-22 08:10:45.371365: step: 304/77, loss: 0.0036856308579444885 2023-01-22 08:10:46.675724: step: 308/77, loss: 0.03618558123707771 2023-01-22 08:10:48.007296: step: 312/77, loss: 0.0694272518157959 2023-01-22 08:10:49.324198: step: 316/77, loss: 0.1101406142115593 2023-01-22 08:10:50.611049: step: 320/77, loss: 0.0017924606800079346 2023-01-22 08:10:51.887994: step: 324/77, loss: 0.0023241147864609957 2023-01-22 08:10:53.212286: step: 328/77, loss: 0.0022019895259290934 2023-01-22 08:10:54.518473: step: 332/77, loss: 0.0007412676350213587 2023-01-22 08:10:55.807726: step: 336/77, loss: 0.03281879797577858 2023-01-22 08:10:57.070690: step: 340/77, loss: 0.0045754555612802505 2023-01-22 08:10:58.349718: step: 344/77, loss: 0.009205389767885208 2023-01-22 08:10:59.631891: step: 348/77, loss: 0.032856326550245285 2023-01-22 08:11:00.941910: step: 352/77, loss: 0.0010040453635156155 2023-01-22 08:11:02.256379: step: 356/77, loss: 2.4541625407437095e-06 2023-01-22 08:11:03.531571: step: 360/77, loss: 0.026075957342982292 2023-01-22 08:11:04.764344: step: 364/77, loss: 0.0016539209755137563 2023-01-22 08:11:06.069730: step: 368/77, loss: 0.00030679808696731925 2023-01-22 08:11:07.333238: step: 372/77, loss: 0.0001105769770219922 2023-01-22 08:11:08.680867: step: 376/77, loss: 0.01804657094180584 2023-01-22 08:11:09.995761: step: 380/77, loss: 0.034142449498176575 2023-01-22 08:11:11.321426: step: 384/77, loss: 0.00044037040788680315 2023-01-22 08:11:12.590005: step: 388/77, loss: 0.00038718071300536394 ================================================== Loss: 0.015 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Chinese: {'template': {'p': 0.9305555555555556, 'r': 0.5275590551181102, 'f1': 0.6733668341708542}, 'slot': {'p': 0.5853658536585366, 'r': 0.020761245674740483, 'f1': 0.040100250626566414}, 'combined': 0.02700217881386884, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Korean: {'template': {'p': 0.9295774647887324, 'r': 0.5196850393700787, 'f1': 0.6666666666666665}, 'slot': {'p': 0.5581395348837209, 'r': 0.020761245674740483, 'f1': 0.040033361134278564}, 'combined': 0.02668890742285237, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Russian: {'template': {'p': 0.9305555555555556, 'r': 0.5275590551181102, 'f1': 0.6733668341708542}, 'slot': {'p': 0.5581395348837209, 'r': 0.020761245674740483, 'f1': 0.040033361134278564}, 'combined': 0.026957137648207674, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 14 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:12:55.953963: step: 4/77, loss: 0.006818510126322508 2023-01-22 08:12:57.249188: step: 8/77, loss: 0.0006258913199417293 2023-01-22 08:12:58.547552: step: 12/77, loss: 0.0027833329513669014 2023-01-22 08:12:59.853147: step: 16/77, loss: 0.0074259317480027676 2023-01-22 08:13:01.097262: step: 20/77, loss: 0.0012211976572871208 2023-01-22 08:13:02.330730: step: 24/77, loss: 0.004635686054825783 2023-01-22 08:13:03.652265: step: 28/77, loss: 0.010179792530834675 2023-01-22 08:13:04.933404: step: 32/77, loss: 0.0019203039119020104 2023-01-22 08:13:06.173694: step: 36/77, loss: 0.005334170069545507 2023-01-22 08:13:07.452259: step: 40/77, loss: 0.013824529945850372 2023-01-22 08:13:08.739102: step: 44/77, loss: 0.004141363315284252 2023-01-22 08:13:10.067902: step: 48/77, loss: 4.5128668716643006e-05 2023-01-22 08:13:11.378553: step: 52/77, loss: 0.052282486110925674 2023-01-22 08:13:12.672385: step: 56/77, loss: 1.9176808564225212e-05 2023-01-22 08:13:13.921476: step: 60/77, loss: 0.020106812939047813 2023-01-22 08:13:15.173077: step: 64/77, loss: 0.00036743577220477164 2023-01-22 08:13:16.442084: step: 68/77, loss: 0.022626064717769623 2023-01-22 08:13:17.719264: step: 72/77, loss: 1.0763298632809892e-05 2023-01-22 08:13:18.989384: step: 76/77, loss: 0.0008758734329603612 2023-01-22 08:13:20.252715: step: 80/77, loss: 0.028549078851938248 2023-01-22 08:13:21.534209: step: 84/77, loss: 6.134075374575332e-05 2023-01-22 08:13:22.826463: step: 88/77, loss: 0.010253140702843666 2023-01-22 08:13:24.102958: step: 92/77, loss: 1.0152663890039548e-05 2023-01-22 08:13:25.429162: step: 96/77, loss: 0.009076571092009544 2023-01-22 08:13:26.734226: step: 100/77, loss: 0.011913309805095196 2023-01-22 08:13:28.007664: step: 104/77, loss: 0.004169612191617489 2023-01-22 08:13:29.323172: step: 108/77, loss: 0.0008429823210462928 2023-01-22 08:13:30.624609: step: 112/77, loss: 0.0046945675276219845 2023-01-22 08:13:31.917556: step: 116/77, loss: 0.08648164570331573 2023-01-22 08:13:33.214822: step: 120/77, loss: 0.022534571588039398 2023-01-22 08:13:34.490406: step: 124/77, loss: 0.006535988301038742 2023-01-22 08:13:35.818058: step: 128/77, loss: 0.00017252654652111232 2023-01-22 08:13:37.091744: step: 132/77, loss: 0.0002712097193580121 2023-01-22 08:13:38.358927: step: 136/77, loss: 0.006114703603088856 2023-01-22 08:13:39.670994: step: 140/77, loss: 0.00034197320928797126 2023-01-22 08:13:40.971016: step: 144/77, loss: 8.801784133538604e-05 2023-01-22 08:13:42.314977: step: 148/77, loss: 0.002975575625896454 2023-01-22 08:13:43.617619: step: 152/77, loss: 0.01886889897286892 2023-01-22 08:13:44.936589: step: 156/77, loss: 0.011135238222777843 2023-01-22 08:13:46.182812: step: 160/77, loss: 0.0024436095263808966 2023-01-22 08:13:47.458605: step: 164/77, loss: 0.0031774109229445457 2023-01-22 08:13:48.708971: step: 168/77, loss: 0.018518783152103424 2023-01-22 08:13:50.039833: step: 172/77, loss: 0.0018485994078218937 2023-01-22 08:13:51.378874: step: 176/77, loss: 0.00010482324432814494 2023-01-22 08:13:52.708515: step: 180/77, loss: 0.0023094690404832363 2023-01-22 08:13:53.982157: step: 184/77, loss: 0.0215534046292305 2023-01-22 08:13:55.244027: step: 188/77, loss: 0.10720938444137573 2023-01-22 08:13:56.555617: step: 192/77, loss: 0.010321415960788727 2023-01-22 08:13:57.873795: step: 196/77, loss: 0.009301860816776752 2023-01-22 08:13:59.183427: step: 200/77, loss: 0.013922393321990967 2023-01-22 08:14:00.478039: step: 204/77, loss: 0.06962837278842926 2023-01-22 08:14:01.750537: step: 208/77, loss: 2.3917215003166348e-05 2023-01-22 08:14:03.028123: step: 212/77, loss: 0.0030422827694565058 2023-01-22 08:14:04.365195: step: 216/77, loss: 0.0008390427683480084 2023-01-22 08:14:05.674402: step: 220/77, loss: 0.000872556702233851 2023-01-22 08:14:06.943590: step: 224/77, loss: 0.002295607002452016 2023-01-22 08:14:08.207922: step: 228/77, loss: 0.00622314028441906 2023-01-22 08:14:09.489372: step: 232/77, loss: 0.020128877833485603 2023-01-22 08:14:10.831456: step: 236/77, loss: 0.019149256870150566 2023-01-22 08:14:12.214157: step: 240/77, loss: 0.0004515836189966649 2023-01-22 08:14:13.517575: step: 244/77, loss: 0.011811056174337864 2023-01-22 08:14:14.801398: step: 248/77, loss: 0.02650582231581211 2023-01-22 08:14:16.057697: step: 252/77, loss: 0.00040723313577473164 2023-01-22 08:14:17.362513: step: 256/77, loss: 0.0022488830145448446 2023-01-22 08:14:18.660727: step: 260/77, loss: 0.005055895075201988 2023-01-22 08:14:19.960307: step: 264/77, loss: 3.0174373932823073e-06 2023-01-22 08:14:21.296508: step: 268/77, loss: 0.0850355327129364 2023-01-22 08:14:22.541411: step: 272/77, loss: 0.005542317871004343 2023-01-22 08:14:23.852774: step: 276/77, loss: 0.0464678555727005 2023-01-22 08:14:25.201433: step: 280/77, loss: 0.0031086173839867115 2023-01-22 08:14:26.548220: step: 284/77, loss: 0.0017400861252099276 2023-01-22 08:14:27.859601: step: 288/77, loss: 0.003277476178482175 2023-01-22 08:14:29.159199: step: 292/77, loss: 0.0005285036750137806 2023-01-22 08:14:30.474158: step: 296/77, loss: 0.0001412014535162598 2023-01-22 08:14:31.836197: step: 300/77, loss: 0.029769254848361015 2023-01-22 08:14:33.147997: step: 304/77, loss: 0.016503766179084778 2023-01-22 08:14:34.509554: step: 308/77, loss: 0.002605894347652793 2023-01-22 08:14:35.856192: step: 312/77, loss: 0.09122447669506073 2023-01-22 08:14:37.134138: step: 316/77, loss: 0.0014124336885288358 2023-01-22 08:14:38.421817: step: 320/77, loss: 0.004125905688852072 2023-01-22 08:14:39.755872: step: 324/77, loss: 0.004153760150074959 2023-01-22 08:14:41.030356: step: 328/77, loss: 0.003013004781678319 2023-01-22 08:14:42.330723: step: 332/77, loss: 0.0016704755835235119 2023-01-22 08:14:43.627235: step: 336/77, loss: 0.008554182946681976 2023-01-22 08:14:44.891634: step: 340/77, loss: 0.005642848089337349 2023-01-22 08:14:46.188536: step: 344/77, loss: 0.026147441938519478 2023-01-22 08:14:47.509008: step: 348/77, loss: 0.0007405009819194674 2023-01-22 08:14:48.809966: step: 352/77, loss: 0.0007274311501532793 2023-01-22 08:14:50.068025: step: 356/77, loss: 0.002250226214528084 2023-01-22 08:14:51.408823: step: 360/77, loss: 0.007931672036647797 2023-01-22 08:14:52.693173: step: 364/77, loss: 0.005355477333068848 2023-01-22 08:14:53.996264: step: 368/77, loss: 0.0003642349038273096 2023-01-22 08:14:55.340559: step: 372/77, loss: 0.0006663693930022418 2023-01-22 08:14:56.631249: step: 376/77, loss: 0.05269888788461685 2023-01-22 08:14:57.942404: step: 380/77, loss: 0.0001794708805391565 2023-01-22 08:14:59.318007: step: 384/77, loss: 0.04961037263274193 2023-01-22 08:15:00.630368: step: 388/77, loss: 0.0022222367115318775 ================================================== Loss: 0.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 14} Test Chinese: {'template': {'p': 0.935064935064935, 'r': 0.5669291338582677, 'f1': 0.7058823529411765}, 'slot': {'p': 0.7037037037037037, 'r': 0.01643598615916955, 'f1': 0.032121724429416736}, 'combined': 0.022674158420764756, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 14} Test Korean: {'template': {'p': 0.9358974358974359, 'r': 0.5748031496062992, 'f1': 0.7121951219512195}, 'slot': {'p': 0.6785714285714286, 'r': 0.01643598615916955, 'f1': 0.03209459459459459}, 'combined': 0.022857613711272245, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 14} Test Russian: {'template': {'p': 0.9230769230769231, 'r': 0.5669291338582677, 'f1': 0.7024390243902439}, 'slot': {'p': 0.7407407407407407, 'r': 0.01730103806228374, 'f1': 0.03381234150464919}, 'combined': 0.02375110817887553, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 15 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:16:44.059410: step: 4/77, loss: 0.0004816804139409214 2023-01-22 08:16:45.342408: step: 8/77, loss: 1.6693866200512275e-05 2023-01-22 08:16:46.637970: step: 12/77, loss: 4.607753726304509e-05 2023-01-22 08:16:47.949612: step: 16/77, loss: 0.0003652075829450041 2023-01-22 08:16:49.236358: step: 20/77, loss: 0.0014403036329895258 2023-01-22 08:16:50.489360: step: 24/77, loss: 0.0024174025747925043 2023-01-22 08:16:51.807675: step: 28/77, loss: 1.7359528783345013e-06 2023-01-22 08:16:53.177761: step: 32/77, loss: 0.04290907457470894 2023-01-22 08:16:54.458723: step: 36/77, loss: 0.0003589342813938856 2023-01-22 08:16:55.705432: step: 40/77, loss: 0.012305357493460178 2023-01-22 08:16:57.014442: step: 44/77, loss: 0.019339703023433685 2023-01-22 08:16:58.291517: step: 48/77, loss: 0.003062969772145152 2023-01-22 08:16:59.546018: step: 52/77, loss: 5.2768882596865296e-05 2023-01-22 08:17:00.810315: step: 56/77, loss: 0.006697559729218483 2023-01-22 08:17:02.081364: step: 60/77, loss: 0.0011638242285698652 2023-01-22 08:17:03.389879: step: 64/77, loss: 0.01967165619134903 2023-01-22 08:17:04.598770: step: 68/77, loss: 0.004847807809710503 2023-01-22 08:17:05.866367: step: 72/77, loss: 0.000611055816989392 2023-01-22 08:17:07.199038: step: 76/77, loss: 0.0048630512319505215 2023-01-22 08:17:08.492354: step: 80/77, loss: 0.054831236600875854 2023-01-22 08:17:09.780165: step: 84/77, loss: 0.01335857156664133 2023-01-22 08:17:11.106181: step: 88/77, loss: 0.020420288667082787 2023-01-22 08:17:12.384846: step: 92/77, loss: 0.0005267034866847098 2023-01-22 08:17:13.699194: step: 96/77, loss: 0.009028802625834942 2023-01-22 08:17:14.958530: step: 100/77, loss: 0.013945749960839748 2023-01-22 08:17:16.203078: step: 104/77, loss: 0.04236004129052162 2023-01-22 08:17:17.542203: step: 108/77, loss: 0.00033831584732979536 2023-01-22 08:17:18.846484: step: 112/77, loss: 5.3080308134667575e-05 2023-01-22 08:17:20.085594: step: 116/77, loss: 0.07516567409038544 2023-01-22 08:17:21.346914: step: 120/77, loss: 0.015899505466222763 2023-01-22 08:17:22.620561: step: 124/77, loss: 0.0003939236339647323 2023-01-22 08:17:23.927584: step: 128/77, loss: 0.004468400496989489 2023-01-22 08:17:25.235772: step: 132/77, loss: 0.005055863410234451 2023-01-22 08:17:26.581166: step: 136/77, loss: 0.0025805742479860783 2023-01-22 08:17:27.889666: step: 140/77, loss: 0.0005273033166304231 2023-01-22 08:17:29.204600: step: 144/77, loss: 0.060450442135334015 2023-01-22 08:17:30.485287: step: 148/77, loss: 0.018075956031680107 2023-01-22 08:17:31.791844: step: 152/77, loss: 0.06206171587109566 2023-01-22 08:17:33.118641: step: 156/77, loss: 0.001956725725904107 2023-01-22 08:17:34.431465: step: 160/77, loss: 0.0050781648606061935 2023-01-22 08:17:35.699537: step: 164/77, loss: 0.004459173884242773 2023-01-22 08:17:36.994019: step: 168/77, loss: 0.04625125601887703 2023-01-22 08:17:38.291556: step: 172/77, loss: 0.0015759647358208895 2023-01-22 08:17:39.568170: step: 176/77, loss: 0.033126529306173325 2023-01-22 08:17:40.888419: step: 180/77, loss: 7.026261300779879e-05 2023-01-22 08:17:42.180484: step: 184/77, loss: 0.0015952292596921325 2023-01-22 08:17:43.462873: step: 188/77, loss: 0.03751169145107269 2023-01-22 08:17:44.737875: step: 192/77, loss: 0.009760575369000435 2023-01-22 08:17:46.060266: step: 196/77, loss: 0.02395324595272541 2023-01-22 08:17:47.338554: step: 200/77, loss: 0.07830063998699188 2023-01-22 08:17:48.657110: step: 204/77, loss: 6.735025090165436e-05 2023-01-22 08:17:49.948008: step: 208/77, loss: 0.002208677353337407 2023-01-22 08:17:51.239582: step: 212/77, loss: 0.006165114231407642 2023-01-22 08:17:52.587074: step: 216/77, loss: 0.005174754187464714 2023-01-22 08:17:53.927667: step: 220/77, loss: 0.0012870485661551356 2023-01-22 08:17:55.211734: step: 224/77, loss: 0.012494869530200958 2023-01-22 08:17:56.487917: step: 228/77, loss: 0.014958437532186508 2023-01-22 08:17:57.751975: step: 232/77, loss: 0.0002445927239023149 2023-01-22 08:17:59.082231: step: 236/77, loss: 0.00011449036537669599 2023-01-22 08:18:00.373653: step: 240/77, loss: 0.0002988382475450635 2023-01-22 08:18:01.681537: step: 244/77, loss: 0.005984320305287838 2023-01-22 08:18:02.976415: step: 248/77, loss: 6.18845151620917e-05 2023-01-22 08:18:04.265164: step: 252/77, loss: 0.0006847563199698925 2023-01-22 08:18:05.574048: step: 256/77, loss: 0.004405403509736061 2023-01-22 08:18:06.868318: step: 260/77, loss: 0.00732279010117054 2023-01-22 08:18:08.171865: step: 264/77, loss: 0.05142771080136299 2023-01-22 08:18:09.482013: step: 268/77, loss: 6.224772369023412e-05 2023-01-22 08:18:10.766174: step: 272/77, loss: 0.0019245930016040802 2023-01-22 08:18:12.058491: step: 276/77, loss: 0.0025355899706482887 2023-01-22 08:18:13.404195: step: 280/77, loss: 0.006086964160203934 2023-01-22 08:18:14.731300: step: 284/77, loss: 0.0012631918070837855 2023-01-22 08:18:16.037903: step: 288/77, loss: 0.0001228465116582811 2023-01-22 08:18:17.359077: step: 292/77, loss: 0.00019296916434541345 2023-01-22 08:18:18.659245: step: 296/77, loss: 0.002572681289166212 2023-01-22 08:18:19.962165: step: 300/77, loss: 1.442090706404997e-05 2023-01-22 08:18:21.265549: step: 304/77, loss: 3.7758538837806555e-06 2023-01-22 08:18:22.552611: step: 308/77, loss: 0.019748851656913757 2023-01-22 08:18:23.804888: step: 312/77, loss: 0.00014960896805860102 2023-01-22 08:18:25.118548: step: 316/77, loss: 0.011053039692342281 2023-01-22 08:18:26.424924: step: 320/77, loss: 2.8220272270118585e-06 2023-01-22 08:18:27.684073: step: 324/77, loss: 0.0003843028098344803 2023-01-22 08:18:28.967571: step: 328/77, loss: 0.01987829990684986 2023-01-22 08:18:30.236251: step: 332/77, loss: 0.09208142012357712 2023-01-22 08:18:31.542913: step: 336/77, loss: 0.003218474332243204 2023-01-22 08:18:32.857491: step: 340/77, loss: 0.0072304122149944305 2023-01-22 08:18:34.157186: step: 344/77, loss: 0.0015550671378150582 2023-01-22 08:18:35.443417: step: 348/77, loss: 0.00026606611208990216 2023-01-22 08:18:36.754282: step: 352/77, loss: 2.4905082682380453e-05 2023-01-22 08:18:38.065982: step: 356/77, loss: 0.05696500837802887 2023-01-22 08:18:39.354663: step: 360/77, loss: 4.337245991337113e-05 2023-01-22 08:18:40.613223: step: 364/77, loss: 0.015604798682034016 2023-01-22 08:18:41.966852: step: 368/77, loss: 4.735090897156624e-06 2023-01-22 08:18:43.254773: step: 372/77, loss: 5.179361323826015e-06 2023-01-22 08:18:44.593540: step: 376/77, loss: 0.012082105502486229 2023-01-22 08:18:45.895600: step: 380/77, loss: 0.00010182980622630566 2023-01-22 08:18:47.135344: step: 384/77, loss: 8.844018157105893e-05 2023-01-22 08:18:48.400719: step: 388/77, loss: 0.05117665231227875 ================================================== Loss: 0.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 15} Test Chinese: {'template': {'p': 0.9324324324324325, 'r': 0.5433070866141733, 'f1': 0.6865671641791046}, 'slot': {'p': 0.5675675675675675, 'r': 0.018166089965397925, 'f1': 0.03520536462699078}, 'combined': 0.024170847355844422, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 15} Test Korean: {'template': {'p': 0.918918918918919, 'r': 0.5354330708661418, 'f1': 0.6766169154228856}, 'slot': {'p': 0.5405405405405406, 'r': 0.01730103806228374, 'f1': 0.03352891869237217}, 'combined': 0.022686233543097588, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 15} Test Russian: {'template': {'p': 0.9315068493150684, 'r': 0.5354330708661418, 'f1': 0.6799999999999999}, 'slot': {'p': 0.5675675675675675, 'r': 0.018166089965397925, 'f1': 0.03520536462699078}, 'combined': 0.02393964794635373, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 16 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:20:31.694139: step: 4/77, loss: 4.861850356974173e-06 2023-01-22 08:20:32.978255: step: 8/77, loss: 1.5866782632656395e-05 2023-01-22 08:20:34.302831: step: 12/77, loss: 0.00033481360878795385 2023-01-22 08:20:35.617858: step: 16/77, loss: 7.987874596437905e-06 2023-01-22 08:20:36.875411: step: 20/77, loss: 0.0006085088243708014 2023-01-22 08:20:38.162495: step: 24/77, loss: 0.0014214256079867482 2023-01-22 08:20:39.426140: step: 28/77, loss: 0.033199504017829895 2023-01-22 08:20:40.712537: step: 32/77, loss: 0.002857948187738657 2023-01-22 08:20:42.010690: step: 36/77, loss: 0.002551297191530466 2023-01-22 08:20:43.272182: step: 40/77, loss: 0.0009598369360901415 2023-01-22 08:20:44.575361: step: 44/77, loss: 3.17394039939245e-07 2023-01-22 08:20:45.811194: step: 48/77, loss: 0.00013348981156013906 2023-01-22 08:20:47.143058: step: 52/77, loss: 0.00243179639801383 2023-01-22 08:20:48.407948: step: 56/77, loss: 0.00048356899060308933 2023-01-22 08:20:49.711649: step: 60/77, loss: 0.036274898797273636 2023-01-22 08:20:51.018435: step: 64/77, loss: 0.002666803542524576 2023-01-22 08:20:52.311361: step: 68/77, loss: 0.0021867984905838966 2023-01-22 08:20:53.588527: step: 72/77, loss: 0.055631570518016815 2023-01-22 08:20:54.894580: step: 76/77, loss: 0.04630818963050842 2023-01-22 08:20:56.161020: step: 80/77, loss: 0.0002477295638527721 2023-01-22 08:20:57.447675: step: 84/77, loss: 0.023699330165982246 2023-01-22 08:20:58.729577: step: 88/77, loss: 0.0026453514583408833 2023-01-22 08:21:00.027234: step: 92/77, loss: 0.014910968951880932 2023-01-22 08:21:01.292729: step: 96/77, loss: 0.0005018580122850835 2023-01-22 08:21:02.550688: step: 100/77, loss: 0.0011478365631774068 2023-01-22 08:21:03.851200: step: 104/77, loss: 0.0002814961189869791 2023-01-22 08:21:05.084568: step: 108/77, loss: 0.002626886125653982 2023-01-22 08:21:06.350993: step: 112/77, loss: 5.4958374676061794e-05 2023-01-22 08:21:07.657291: step: 116/77, loss: 4.4051979784853756e-05 2023-01-22 08:21:08.958510: step: 120/77, loss: 0.017519483342766762 2023-01-22 08:21:10.265548: step: 124/77, loss: 0.003170792944729328 2023-01-22 08:21:11.536108: step: 128/77, loss: 0.0010376194259151816 2023-01-22 08:21:12.848794: step: 132/77, loss: 1.1920615179406013e-06 2023-01-22 08:21:14.149136: step: 136/77, loss: 0.00045014932402409613 2023-01-22 08:21:15.461195: step: 140/77, loss: 0.0001678488333709538 2023-01-22 08:21:16.758095: step: 144/77, loss: 0.006620452739298344 2023-01-22 08:21:18.056687: step: 148/77, loss: 3.382553757091955e-07 2023-01-22 08:21:19.361888: step: 152/77, loss: 0.0026630829088389874 2023-01-22 08:21:20.726567: step: 156/77, loss: 2.1847074094694108e-05 2023-01-22 08:21:22.029328: step: 160/77, loss: 0.023481661453843117 2023-01-22 08:21:23.306770: step: 164/77, loss: 0.001555607421323657 2023-01-22 08:21:24.573545: step: 168/77, loss: 0.007830877788364887 2023-01-22 08:21:25.834926: step: 172/77, loss: 0.0030038796830922365 2023-01-22 08:21:27.085105: step: 176/77, loss: 0.00042911790660582483 2023-01-22 08:21:28.357851: step: 180/77, loss: 0.008290370926260948 2023-01-22 08:21:29.693276: step: 184/77, loss: 0.016199413686990738 2023-01-22 08:21:30.999077: step: 188/77, loss: 0.0002577801060397178 2023-01-22 08:21:32.319312: step: 192/77, loss: 0.00019381032325327396 2023-01-22 08:21:33.639813: step: 196/77, loss: 0.00016714620869606733 2023-01-22 08:21:34.921515: step: 200/77, loss: 0.00012267788406461477 2023-01-22 08:21:36.213700: step: 204/77, loss: 0.009114248678088188 2023-01-22 08:21:37.501154: step: 208/77, loss: 0.03911638632416725 2023-01-22 08:21:38.766709: step: 212/77, loss: 1.8671898942557164e-05 2023-01-22 08:21:40.018513: step: 216/77, loss: 0.001954711740836501 2023-01-22 08:21:41.343841: step: 220/77, loss: 0.007040996104478836 2023-01-22 08:21:42.640229: step: 224/77, loss: 1.5829691619728692e-05 2023-01-22 08:21:43.927622: step: 228/77, loss: 0.0002243506460217759 2023-01-22 08:21:45.202116: step: 232/77, loss: 0.00683977035805583 2023-01-22 08:21:46.501798: step: 236/77, loss: 2.2761918444302864e-05 2023-01-22 08:21:47.847097: step: 240/77, loss: 0.019565310329198837 2023-01-22 08:21:49.189192: step: 244/77, loss: 9.414236956217792e-06 2023-01-22 08:21:50.501720: step: 248/77, loss: 1.728589450067375e-05 2023-01-22 08:21:51.786709: step: 252/77, loss: 0.0035243923775851727 2023-01-22 08:21:53.059604: step: 256/77, loss: 0.0015222537331283092 2023-01-22 08:21:54.353674: step: 260/77, loss: 4.505701144807972e-05 2023-01-22 08:21:55.639007: step: 264/77, loss: 0.0015098822768777609 2023-01-22 08:21:56.953179: step: 268/77, loss: 0.01089445035904646 2023-01-22 08:21:58.266053: step: 272/77, loss: 0.03385207802057266 2023-01-22 08:21:59.559223: step: 276/77, loss: 0.00025587028358131647 2023-01-22 08:22:00.863179: step: 280/77, loss: 3.722103065229021e-05 2023-01-22 08:22:02.189693: step: 284/77, loss: 0.002465657889842987 2023-01-22 08:22:03.455696: step: 288/77, loss: 0.01795008033514023 2023-01-22 08:22:04.796900: step: 292/77, loss: 1.3884822692489251e-05 2023-01-22 08:22:06.112620: step: 296/77, loss: 0.007718597073107958 2023-01-22 08:22:07.390177: step: 300/77, loss: 0.028821276500821114 2023-01-22 08:22:08.705605: step: 304/77, loss: 0.06679520010948181 2023-01-22 08:22:10.029192: step: 308/77, loss: 0.016347669064998627 2023-01-22 08:22:11.295029: step: 312/77, loss: 5.412711834651418e-05 2023-01-22 08:22:12.574738: step: 316/77, loss: 0.009288772009313107 2023-01-22 08:22:13.914158: step: 320/77, loss: 0.00021472680964507163 2023-01-22 08:22:15.259611: step: 324/77, loss: 8.622468885732815e-05 2023-01-22 08:22:16.582836: step: 328/77, loss: 0.012828285805881023 2023-01-22 08:22:17.961792: step: 332/77, loss: 0.013079500757157803 2023-01-22 08:22:19.271348: step: 336/77, loss: 0.00018611534324008971 2023-01-22 08:22:20.555413: step: 340/77, loss: 0.00037904319469816983 2023-01-22 08:22:21.839407: step: 344/77, loss: 0.00024278687487822026 2023-01-22 08:22:23.078183: step: 348/77, loss: 0.03581083565950394 2023-01-22 08:22:24.379908: step: 352/77, loss: 1.897270885820035e-05 2023-01-22 08:22:25.702934: step: 356/77, loss: 0.00048672978300601244 2023-01-22 08:22:26.992061: step: 360/77, loss: 0.0008217963622882962 2023-01-22 08:22:28.280611: step: 364/77, loss: 0.0005745338276028633 2023-01-22 08:22:29.553770: step: 368/77, loss: 0.002440313808619976 2023-01-22 08:22:30.888878: step: 372/77, loss: 0.0001457735343137756 2023-01-22 08:22:32.199063: step: 376/77, loss: 0.033896248787641525 2023-01-22 08:22:33.502452: step: 380/77, loss: 0.016800181940197945 2023-01-22 08:22:34.858097: step: 384/77, loss: 0.02510041743516922 2023-01-22 08:22:36.197859: step: 388/77, loss: 0.036930397152900696 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Chinese: {'template': {'p': 0.9358974358974359, 'r': 0.5748031496062992, 'f1': 0.7121951219512195}, 'slot': {'p': 0.6060606060606061, 'r': 0.01730103806228374, 'f1': 0.0336417157275021}, 'combined': 0.02395946583519662, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Korean: {'template': {'p': 0.9240506329113924, 'r': 0.5748031496062992, 'f1': 0.7087378640776699}, 'slot': {'p': 0.6176470588235294, 'r': 0.018166089965397925, 'f1': 0.03529411764705882}, 'combined': 0.025014277555682467, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Russian: {'template': {'p': 0.9358974358974359, 'r': 0.5748031496062992, 'f1': 0.7121951219512195}, 'slot': {'p': 0.6363636363636364, 'r': 0.018166089965397925, 'f1': 0.03532380151387721}, 'combined': 0.02515743912695645, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 17 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:24:19.540342: step: 4/77, loss: 0.000641880847979337 2023-01-22 08:24:20.874113: step: 8/77, loss: 0.002244143048301339 2023-01-22 08:24:22.207860: step: 12/77, loss: 0.021898828446865082 2023-01-22 08:24:23.492733: step: 16/77, loss: 0.0001363503688480705 2023-01-22 08:24:24.756957: step: 20/77, loss: 8.214037370635197e-05 2023-01-22 08:24:26.062235: step: 24/77, loss: 1.8887612895923667e-05 2023-01-22 08:24:27.358806: step: 28/77, loss: 0.0013867117231711745 2023-01-22 08:24:28.710120: step: 32/77, loss: 0.0017078772652894258 2023-01-22 08:24:30.002005: step: 36/77, loss: 3.950330938096158e-05 2023-01-22 08:24:31.305697: step: 40/77, loss: 0.0019030816620215774 2023-01-22 08:24:32.624778: step: 44/77, loss: 8.705825894139707e-05 2023-01-22 08:24:33.931868: step: 48/77, loss: 0.00028506916714832187 2023-01-22 08:24:35.227810: step: 52/77, loss: 0.005858226679265499 2023-01-22 08:24:36.499874: step: 56/77, loss: 0.0007443473441526294 2023-01-22 08:24:37.785847: step: 60/77, loss: 0.00011790436110459268 2023-01-22 08:24:39.054526: step: 64/77, loss: 1.5535473721683957e-05 2023-01-22 08:24:40.354613: step: 68/77, loss: 0.0008613172685727477 2023-01-22 08:24:41.676475: step: 72/77, loss: 0.005559463519603014 2023-01-22 08:24:42.973374: step: 76/77, loss: 0.04706824570894241 2023-01-22 08:24:44.284099: step: 80/77, loss: 6.35113101452589e-05 2023-01-22 08:24:45.546192: step: 84/77, loss: 0.00010967568960040808 2023-01-22 08:24:46.859106: step: 88/77, loss: 0.0002559815184213221 2023-01-22 08:24:48.152746: step: 92/77, loss: 0.031311001628637314 2023-01-22 08:24:49.430117: step: 96/77, loss: 0.055141374468803406 2023-01-22 08:24:50.722557: step: 100/77, loss: 0.016477080062031746 2023-01-22 08:24:52.028635: step: 104/77, loss: 0.0002590902440715581 2023-01-22 08:24:53.337222: step: 108/77, loss: 6.216138717718422e-05 2023-01-22 08:24:54.653880: step: 112/77, loss: 0.0004739709838759154 2023-01-22 08:24:55.950145: step: 116/77, loss: 0.008552845567464828 2023-01-22 08:24:57.231862: step: 120/77, loss: 0.001149423187598586 2023-01-22 08:24:58.545379: step: 124/77, loss: 0.01125161163508892 2023-01-22 08:24:59.837236: step: 128/77, loss: 0.00855990033596754 2023-01-22 08:25:01.069468: step: 132/77, loss: 0.005925518926233053 2023-01-22 08:25:02.343384: step: 136/77, loss: 0.0032444128300994635 2023-01-22 08:25:03.652403: step: 140/77, loss: 0.0002631635288707912 2023-01-22 08:25:04.919811: step: 144/77, loss: 0.07887933403253555 2023-01-22 08:25:06.205714: step: 148/77, loss: 0.0003720789682120085 2023-01-22 08:25:07.470345: step: 152/77, loss: 0.01905059814453125 2023-01-22 08:25:08.807274: step: 156/77, loss: 0.008945231325924397 2023-01-22 08:25:10.112694: step: 160/77, loss: 0.0002890854375436902 2023-01-22 08:25:11.456649: step: 164/77, loss: 0.019471365958452225 2023-01-22 08:25:12.770763: step: 168/77, loss: 0.0015825422015041113 2023-01-22 08:25:14.058947: step: 172/77, loss: 0.008039366453886032 2023-01-22 08:25:15.333979: step: 176/77, loss: 0.0008831970044411719 2023-01-22 08:25:16.599070: step: 180/77, loss: 0.014655493199825287 2023-01-22 08:25:17.891932: step: 184/77, loss: 0.04497211426496506 2023-01-22 08:25:19.165737: step: 188/77, loss: 0.002673403127118945 2023-01-22 08:25:20.483296: step: 192/77, loss: 0.0011580471182242036 2023-01-22 08:25:21.776072: step: 196/77, loss: 1.5838615581742488e-05 2023-01-22 08:25:23.061929: step: 200/77, loss: 4.164213896729052e-05 2023-01-22 08:25:24.361543: step: 204/77, loss: 1.3706779100175481e-05 2023-01-22 08:25:25.672580: step: 208/77, loss: 9.41771941143088e-06 2023-01-22 08:25:26.973286: step: 212/77, loss: 0.00037044179043732584 2023-01-22 08:25:28.234009: step: 216/77, loss: 0.0010035919258370996 2023-01-22 08:25:29.502093: step: 220/77, loss: 2.4068180209724233e-05 2023-01-22 08:25:30.806356: step: 224/77, loss: 0.0002719534677453339 2023-01-22 08:25:32.089491: step: 228/77, loss: 0.0005661757895722985 2023-01-22 08:25:33.367072: step: 232/77, loss: 7.066810212563723e-05 2023-01-22 08:25:34.686902: step: 236/77, loss: 0.00011910132161574438 2023-01-22 08:25:36.045208: step: 240/77, loss: 0.028321973979473114 2023-01-22 08:25:37.346197: step: 244/77, loss: 0.0003590689739212394 2023-01-22 08:25:38.652925: step: 248/77, loss: 3.452347664278932e-05 2023-01-22 08:25:39.966716: step: 252/77, loss: 0.0032013666350394487 2023-01-22 08:25:41.193814: step: 256/77, loss: 9.446490730624646e-05 2023-01-22 08:25:42.497550: step: 260/77, loss: 0.028026148676872253 2023-01-22 08:25:43.745554: step: 264/77, loss: 6.273339749895968e-07 2023-01-22 08:25:45.077357: step: 268/77, loss: 0.002552525606006384 2023-01-22 08:25:46.401381: step: 272/77, loss: 0.00981560256332159 2023-01-22 08:25:47.697771: step: 276/77, loss: 3.770161856664345e-05 2023-01-22 08:25:48.949015: step: 280/77, loss: 0.0011477674124762416 2023-01-22 08:25:50.254275: step: 284/77, loss: 0.027259770780801773 2023-01-22 08:25:51.576134: step: 288/77, loss: 0.03577423840761185 2023-01-22 08:25:52.897539: step: 292/77, loss: 0.12435278296470642 2023-01-22 08:25:54.175519: step: 296/77, loss: 5.416186922957422e-06 2023-01-22 08:25:55.502500: step: 300/77, loss: 1.01263740361901e-05 2023-01-22 08:25:56.791808: step: 304/77, loss: 0.026366937905550003 2023-01-22 08:25:58.080581: step: 308/77, loss: 0.0008996648248285055 2023-01-22 08:25:59.375217: step: 312/77, loss: 4.338581675256137e-06 2023-01-22 08:26:00.698945: step: 316/77, loss: 0.00284166494384408 2023-01-22 08:26:02.002506: step: 320/77, loss: 0.0002525055897422135 2023-01-22 08:26:03.257768: step: 324/77, loss: 0.00012097764556529 2023-01-22 08:26:04.541421: step: 328/77, loss: 0.02872185781598091 2023-01-22 08:26:05.784618: step: 332/77, loss: 3.208328416803852e-05 2023-01-22 08:26:07.084905: step: 336/77, loss: 0.019269438460469246 2023-01-22 08:26:08.387564: step: 340/77, loss: 0.0260951928794384 2023-01-22 08:26:09.642842: step: 344/77, loss: 0.004579652100801468 2023-01-22 08:26:10.963803: step: 348/77, loss: 0.0003985298390034586 2023-01-22 08:26:12.315948: step: 352/77, loss: 0.009386884048581123 2023-01-22 08:26:13.651898: step: 356/77, loss: 2.577963641670067e-05 2023-01-22 08:26:14.943290: step: 360/77, loss: 0.00037908164085820317 2023-01-22 08:26:16.236202: step: 364/77, loss: 0.0006229857681319118 2023-01-22 08:26:17.567902: step: 368/77, loss: 0.0008804745739325881 2023-01-22 08:26:18.909118: step: 372/77, loss: 0.005055315792560577 2023-01-22 08:26:20.201025: step: 376/77, loss: 0.00023594040249008685 2023-01-22 08:26:21.487795: step: 380/77, loss: 5.790413706563413e-05 2023-01-22 08:26:22.767030: step: 384/77, loss: 0.02676387131214142 2023-01-22 08:26:24.041916: step: 388/77, loss: 9.52510345086921e-06 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Chinese: {'template': {'p': 0.9466666666666667, 'r': 0.5590551181102362, 'f1': 0.7029702970297029}, 'slot': {'p': 0.5869565217391305, 'r': 0.023356401384083045, 'f1': 0.04492512479201331}, 'combined': 0.03158102831913807, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Korean: {'template': {'p': 0.9333333333333333, 'r': 0.5511811023622047, 'f1': 0.693069306930693}, 'slot': {'p': 0.5652173913043478, 'r': 0.02249134948096886, 'f1': 0.04326123128119801}, 'combined': 0.02998303158102832, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Russian: {'template': {'p': 0.935064935064935, 'r': 0.5669291338582677, 'f1': 0.7058823529411765}, 'slot': {'p': 0.574468085106383, 'r': 0.023356401384083045, 'f1': 0.044887780548628436}, 'combined': 0.03168549215197301, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 18 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:28:07.268527: step: 4/77, loss: 0.0002562256995588541 2023-01-22 08:28:08.563457: step: 8/77, loss: 0.005105638410896063 2023-01-22 08:28:09.820284: step: 12/77, loss: 0.021551361307501793 2023-01-22 08:28:11.101768: step: 16/77, loss: 0.0009725134586915374 2023-01-22 08:28:12.374119: step: 20/77, loss: 0.00022831915703136474 2023-01-22 08:28:13.717041: step: 24/77, loss: 0.0030773894395679235 2023-01-22 08:28:14.999157: step: 28/77, loss: 0.014899294823408127 2023-01-22 08:28:16.286702: step: 32/77, loss: 0.00023760151816532016 2023-01-22 08:28:17.590344: step: 36/77, loss: 0.001946118427440524 2023-01-22 08:28:18.909696: step: 40/77, loss: 0.03807735815644264 2023-01-22 08:28:20.201902: step: 44/77, loss: 0.0009200061904266477 2023-01-22 08:28:21.482065: step: 48/77, loss: 1.4559274859493598e-05 2023-01-22 08:28:22.753933: step: 52/77, loss: 0.03053630143404007 2023-01-22 08:28:24.038186: step: 56/77, loss: 0.013728977181017399 2023-01-22 08:28:25.357461: step: 60/77, loss: 9.248963033314794e-05 2023-01-22 08:28:26.630707: step: 64/77, loss: 0.00022051921405363828 2023-01-22 08:28:27.933370: step: 68/77, loss: 0.00015630066627636552 2023-01-22 08:28:29.202565: step: 72/77, loss: 0.005821420811116695 2023-01-22 08:28:30.488427: step: 76/77, loss: 0.0047253817319869995 2023-01-22 08:28:31.794768: step: 80/77, loss: 0.0034232642501592636 2023-01-22 08:28:33.090695: step: 84/77, loss: 0.010041794739663601 2023-01-22 08:28:34.369931: step: 88/77, loss: 0.001764630083926022 2023-01-22 08:28:35.659034: step: 92/77, loss: 0.0006076145800761878 2023-01-22 08:28:36.980810: step: 96/77, loss: 0.00011601823644014075 2023-01-22 08:28:38.261489: step: 100/77, loss: 0.019558578729629517 2023-01-22 08:28:39.605190: step: 104/77, loss: 0.011507065035402775 2023-01-22 08:28:40.881977: step: 108/77, loss: 7.664941222174093e-06 2023-01-22 08:28:42.163550: step: 112/77, loss: 0.0002507162862457335 2023-01-22 08:28:43.449603: step: 116/77, loss: 0.006137733347713947 2023-01-22 08:28:44.688894: step: 120/77, loss: 0.032717470079660416 2023-01-22 08:28:45.951916: step: 124/77, loss: 0.01983604021370411 2023-01-22 08:28:47.270974: step: 128/77, loss: 4.210448241792619e-05 2023-01-22 08:28:48.568969: step: 132/77, loss: 0.011637914925813675 2023-01-22 08:28:49.874134: step: 136/77, loss: 1.722968227113597e-05 2023-01-22 08:28:51.197717: step: 140/77, loss: 1.2233510915393708e-06 2023-01-22 08:28:52.496227: step: 144/77, loss: 0.002384501276537776 2023-01-22 08:28:53.766982: step: 148/77, loss: 0.00037618394708260894 2023-01-22 08:28:55.018180: step: 152/77, loss: 0.0022240763064473867 2023-01-22 08:28:56.280314: step: 156/77, loss: 0.003713844809681177 2023-01-22 08:28:57.611674: step: 160/77, loss: 0.008770265616476536 2023-01-22 08:28:58.941724: step: 164/77, loss: 0.0006316181388683617 2023-01-22 08:29:00.236886: step: 168/77, loss: 3.712902980623767e-05 2023-01-22 08:29:01.573611: step: 172/77, loss: 0.05797393247485161 2023-01-22 08:29:02.859767: step: 176/77, loss: 8.296656596940011e-05 2023-01-22 08:29:04.164848: step: 180/77, loss: 0.007537681609392166 2023-01-22 08:29:05.419497: step: 184/77, loss: 0.023705052211880684 2023-01-22 08:29:06.682497: step: 188/77, loss: 2.0122100977459922e-05 2023-01-22 08:29:07.961615: step: 192/77, loss: 0.00016706100723240525 2023-01-22 08:29:09.252623: step: 196/77, loss: 0.00039428245509043336 2023-01-22 08:29:10.525182: step: 200/77, loss: 0.0008982608560472727 2023-01-22 08:29:11.883697: step: 204/77, loss: 0.007041514851152897 2023-01-22 08:29:13.156842: step: 208/77, loss: 0.00011690274550346658 2023-01-22 08:29:14.444059: step: 212/77, loss: 0.0006392638315446675 2023-01-22 08:29:15.771566: step: 216/77, loss: 0.003344218945130706 2023-01-22 08:29:17.123016: step: 220/77, loss: 0.0005616850685328245 2023-01-22 08:29:18.428265: step: 224/77, loss: 0.00034996814792975783 2023-01-22 08:29:19.745371: step: 228/77, loss: 3.20040180668002e-06 2023-01-22 08:29:21.044859: step: 232/77, loss: 0.005285785999149084 2023-01-22 08:29:22.342692: step: 236/77, loss: 0.0009092001710087061 2023-01-22 08:29:23.628272: step: 240/77, loss: 0.00760983070358634 2023-01-22 08:29:24.904660: step: 244/77, loss: 0.008531717583537102 2023-01-22 08:29:26.212727: step: 248/77, loss: 0.0012734340270981193 2023-01-22 08:29:27.527841: step: 252/77, loss: 0.0020677277352660894 2023-01-22 08:29:28.806080: step: 256/77, loss: 1.4285917131928727e-05 2023-01-22 08:29:30.116989: step: 260/77, loss: 3.830181958619505e-05 2023-01-22 08:29:31.414025: step: 264/77, loss: 0.007554773241281509 2023-01-22 08:29:32.730565: step: 268/77, loss: 1.0465077139087953e-05 2023-01-22 08:29:34.016533: step: 272/77, loss: 0.0043717920780181885 2023-01-22 08:29:35.316101: step: 276/77, loss: 0.006848793011158705 2023-01-22 08:29:36.643338: step: 280/77, loss: 0.011280843056738377 2023-01-22 08:29:37.931674: step: 284/77, loss: 0.000846183393150568 2023-01-22 08:29:39.210028: step: 288/77, loss: 0.03682919219136238 2023-01-22 08:29:40.523225: step: 292/77, loss: 0.02590131014585495 2023-01-22 08:29:41.837095: step: 296/77, loss: 0.0019165530102327466 2023-01-22 08:29:43.188648: step: 300/77, loss: 0.0008535957313142717 2023-01-22 08:29:44.478873: step: 304/77, loss: 0.0002510050544515252 2023-01-22 08:29:45.787119: step: 308/77, loss: 7.329511572606862e-05 2023-01-22 08:29:47.055344: step: 312/77, loss: 0.01697651669383049 2023-01-22 08:29:48.337998: step: 316/77, loss: 5.466431957756868e-06 2023-01-22 08:29:49.632263: step: 320/77, loss: 4.962068373970396e-07 2023-01-22 08:29:50.925976: step: 324/77, loss: 3.868531348416582e-05 2023-01-22 08:29:52.259353: step: 328/77, loss: 0.0004085543332621455 2023-01-22 08:29:53.522333: step: 332/77, loss: 4.1887658881023526e-05 2023-01-22 08:29:54.871410: step: 336/77, loss: 0.0001989584561670199 2023-01-22 08:29:56.191352: step: 340/77, loss: 4.200351668259827e-06 2023-01-22 08:29:57.539927: step: 344/77, loss: 0.0006397374672815204 2023-01-22 08:29:58.826917: step: 348/77, loss: 0.013820907101035118 2023-01-22 08:30:00.176976: step: 352/77, loss: 0.009886541403830051 2023-01-22 08:30:01.498067: step: 356/77, loss: 5.0389326133881696e-06 2023-01-22 08:30:02.816967: step: 360/77, loss: 1.8894158984039677e-06 2023-01-22 08:30:04.081906: step: 364/77, loss: 0.004064930137246847 2023-01-22 08:30:05.403263: step: 368/77, loss: 0.00628216378390789 2023-01-22 08:30:06.704690: step: 372/77, loss: 7.403906784020364e-05 2023-01-22 08:30:08.000945: step: 376/77, loss: 9.216591570293531e-05 2023-01-22 08:30:09.261776: step: 380/77, loss: 0.002427300438284874 2023-01-22 08:30:10.609508: step: 384/77, loss: 0.019096795469522476 2023-01-22 08:30:11.894074: step: 388/77, loss: 2.604655037430348e-06 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Chinese: {'template': {'p': 0.9333333333333333, 'r': 0.5511811023622047, 'f1': 0.693069306930693}, 'slot': {'p': 0.5952380952380952, 'r': 0.02162629757785467, 'f1': 0.041736227045075125}, 'combined': 0.028926097952032263, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Korean: {'template': {'p': 0.9333333333333333, 'r': 0.5511811023622047, 'f1': 0.693069306930693}, 'slot': {'p': 0.5813953488372093, 'r': 0.02162629757785467, 'f1': 0.041701417848206836}, 'combined': 0.028901972766083944, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Russian: {'template': {'p': 0.9333333333333333, 'r': 0.5511811023622047, 'f1': 0.693069306930693}, 'slot': {'p': 0.5813953488372093, 'r': 0.02162629757785467, 'f1': 0.041701417848206836}, 'combined': 0.028901972766083944, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 19 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:31:55.390033: step: 4/77, loss: 4.772192187374458e-05 2023-01-22 08:31:56.681498: step: 8/77, loss: 0.015655361115932465 2023-01-22 08:31:57.991480: step: 12/77, loss: 0.00045056085218675435 2023-01-22 08:31:59.308719: step: 16/77, loss: 4.759726289194077e-05 2023-01-22 08:32:00.605706: step: 20/77, loss: 0.021239858120679855 2023-01-22 08:32:01.881955: step: 24/77, loss: 0.02561485767364502 2023-01-22 08:32:03.161288: step: 28/77, loss: 0.000703348487149924 2023-01-22 08:32:04.458253: step: 32/77, loss: 0.0007722416776232421 2023-01-22 08:32:05.758791: step: 36/77, loss: 2.419845804979559e-05 2023-01-22 08:32:07.033816: step: 40/77, loss: 0.026117466390132904 2023-01-22 08:32:08.373498: step: 44/77, loss: 4.887551767751575e-07 2023-01-22 08:32:09.706663: step: 48/77, loss: 0.019306404516100883 2023-01-22 08:32:10.955708: step: 52/77, loss: 0.0004252229700796306 2023-01-22 08:32:12.225673: step: 56/77, loss: 5.336081812856719e-05 2023-01-22 08:32:13.495497: step: 60/77, loss: 0.00016709948249626905 2023-01-22 08:32:14.764474: step: 64/77, loss: 7.643592653039377e-06 2023-01-22 08:32:16.043133: step: 68/77, loss: 0.011480233632028103 2023-01-22 08:32:17.345748: step: 72/77, loss: 0.019186750054359436 2023-01-22 08:32:18.624596: step: 76/77, loss: 0.005349922925233841 2023-01-22 08:32:19.930992: step: 80/77, loss: 0.11054429411888123 2023-01-22 08:32:21.224737: step: 84/77, loss: 0.0074567473493516445 2023-01-22 08:32:22.496466: step: 88/77, loss: 0.00015486495976801962 2023-01-22 08:32:23.781829: step: 92/77, loss: 0.00017922437109518796 2023-01-22 08:32:25.091831: step: 96/77, loss: 0.000948175962548703 2023-01-22 08:32:26.373584: step: 100/77, loss: 0.014781606383621693 2023-01-22 08:32:27.668865: step: 104/77, loss: 0.0035185841843485832 2023-01-22 08:32:28.983596: step: 108/77, loss: 0.016932154074311256 2023-01-22 08:32:30.317855: step: 112/77, loss: 0.0009530320530757308 2023-01-22 08:32:31.650273: step: 116/77, loss: 0.056651294231414795 2023-01-22 08:32:32.962645: step: 120/77, loss: 0.0004563061229418963 2023-01-22 08:32:34.241928: step: 124/77, loss: 2.2642556359642185e-05 2023-01-22 08:32:35.548971: step: 128/77, loss: 0.0009033044916577637 2023-01-22 08:32:36.791211: step: 132/77, loss: 0.0007692720391787589 2023-01-22 08:32:38.081148: step: 136/77, loss: 6.139226798040909e-07 2023-01-22 08:32:39.318995: step: 140/77, loss: 0.01133093424141407 2023-01-22 08:32:40.612784: step: 144/77, loss: 0.0002770505379885435 2023-01-22 08:32:41.919877: step: 148/77, loss: 0.021317068487405777 2023-01-22 08:32:43.187909: step: 152/77, loss: 0.00023177666298579425 2023-01-22 08:32:44.399521: step: 156/77, loss: 0.004699833691120148 2023-01-22 08:32:45.640805: step: 160/77, loss: 5.041718759457581e-05 2023-01-22 08:32:46.907918: step: 164/77, loss: 0.11572644114494324 2023-01-22 08:32:48.215354: step: 168/77, loss: 0.00011996572720818222 2023-01-22 08:32:49.518388: step: 172/77, loss: 0.0028565579559653997 2023-01-22 08:32:50.780984: step: 176/77, loss: 1.9968625565525144e-05 2023-01-22 08:32:52.075134: step: 180/77, loss: 0.0027047202456742525 2023-01-22 08:32:53.414593: step: 184/77, loss: 0.00011742675269488245 2023-01-22 08:32:54.705385: step: 188/77, loss: 2.1647458197548985e-05 2023-01-22 08:32:56.015396: step: 192/77, loss: 0.011670035310089588 2023-01-22 08:32:57.319350: step: 196/77, loss: 4.1424587493565923e-07 2023-01-22 08:32:58.623740: step: 200/77, loss: 0.00022093798907008022 2023-01-22 08:32:59.969858: step: 204/77, loss: 0.004982348531484604 2023-01-22 08:33:01.236128: step: 208/77, loss: 0.0030651569832116365 2023-01-22 08:33:02.540442: step: 212/77, loss: 0.012024176307022572 2023-01-22 08:33:03.846695: step: 216/77, loss: 0.009974350221455097 2023-01-22 08:33:05.136252: step: 220/77, loss: 5.705433795810677e-05 2023-01-22 08:33:06.440020: step: 224/77, loss: 0.010892936028540134 2023-01-22 08:33:07.787905: step: 228/77, loss: 6.651450348726939e-06 2023-01-22 08:33:09.084109: step: 232/77, loss: 0.002441111486405134 2023-01-22 08:33:10.369625: step: 236/77, loss: 2.452632998029003e-06 2023-01-22 08:33:11.638677: step: 240/77, loss: 0.00028365125763230026 2023-01-22 08:33:12.913306: step: 244/77, loss: 0.000377084594219923 2023-01-22 08:33:14.193074: step: 248/77, loss: 1.0982014373439597e-06 2023-01-22 08:33:15.514728: step: 252/77, loss: 0.006289721466600895 2023-01-22 08:33:16.803709: step: 256/77, loss: 0.000724776997230947 2023-01-22 08:33:18.133558: step: 260/77, loss: 0.0017441506497561932 2023-01-22 08:33:19.388661: step: 264/77, loss: 0.0012942147441208363 2023-01-22 08:33:20.723784: step: 268/77, loss: 0.0005732322460971773 2023-01-22 08:33:22.033796: step: 272/77, loss: 0.005865746643394232 2023-01-22 08:33:23.351568: step: 276/77, loss: 0.017637211829423904 2023-01-22 08:33:24.629395: step: 280/77, loss: 0.0005447964067570865 2023-01-22 08:33:25.902034: step: 284/77, loss: 3.735323844011873e-05 2023-01-22 08:33:27.241514: step: 288/77, loss: 0.003682750044390559 2023-01-22 08:33:28.561334: step: 292/77, loss: 0.07408548891544342 2023-01-22 08:33:29.848033: step: 296/77, loss: 1.5228745724016335e-05 2023-01-22 08:33:31.182456: step: 300/77, loss: 0.006958605255931616 2023-01-22 08:33:32.483099: step: 304/77, loss: 0.042658884078264236 2023-01-22 08:33:33.780328: step: 308/77, loss: 0.005706743337213993 2023-01-22 08:33:35.106003: step: 312/77, loss: 0.00036678268224932253 2023-01-22 08:33:36.424410: step: 316/77, loss: 6.538533489219844e-05 2023-01-22 08:33:37.717305: step: 320/77, loss: 0.0004068778653163463 2023-01-22 08:33:38.997424: step: 324/77, loss: 1.0171790563617833e-05 2023-01-22 08:33:40.309528: step: 328/77, loss: 0.0013522073859348893 2023-01-22 08:33:41.594687: step: 332/77, loss: 0.0041106091812253 2023-01-22 08:33:42.898289: step: 336/77, loss: 0.031761832535266876 2023-01-22 08:33:44.205088: step: 340/77, loss: 0.0006097041186876595 2023-01-22 08:33:45.451928: step: 344/77, loss: 0.0002405718551017344 2023-01-22 08:33:46.762241: step: 348/77, loss: 0.005357260815799236 2023-01-22 08:33:48.101925: step: 352/77, loss: 0.015837207436561584 2023-01-22 08:33:49.438983: step: 356/77, loss: 9.958293230738491e-05 2023-01-22 08:33:50.763565: step: 360/77, loss: 0.00025743391597643495 2023-01-22 08:33:52.042457: step: 364/77, loss: 0.010441180318593979 2023-01-22 08:33:53.367617: step: 368/77, loss: 0.0012988889357075095 2023-01-22 08:33:54.666794: step: 372/77, loss: 0.013321981765329838 2023-01-22 08:33:55.952252: step: 376/77, loss: 0.005950694903731346 2023-01-22 08:33:57.298873: step: 380/77, loss: 0.06717066466808319 2023-01-22 08:33:58.611733: step: 384/77, loss: 0.059004560112953186 2023-01-22 08:33:59.882517: step: 388/77, loss: 0.0031717417296022177 ================================================== Loss: 0.010 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 19} Test Chinese: {'template': {'p': 0.9466666666666667, 'r': 0.5590551181102362, 'f1': 0.7029702970297029}, 'slot': {'p': 0.625, 'r': 0.01730103806228374, 'f1': 0.03367003367003367}, 'combined': 0.023669033570023666, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 19} Test Korean: {'template': {'p': 0.9452054794520548, 'r': 0.5433070866141733, 'f1': 0.69}, 'slot': {'p': 0.6060606060606061, 'r': 0.01730103806228374, 'f1': 0.0336417157275021}, 'combined': 0.02321278385197645, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 19} Test Russian: {'template': {'p': 0.9466666666666667, 'r': 0.5590551181102362, 'f1': 0.7029702970297029}, 'slot': {'p': 0.625, 'r': 0.01730103806228374, 'f1': 0.03367003367003367}, 'combined': 0.023669033570023666, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 20 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:35:43.087830: step: 4/77, loss: 0.02381194569170475 2023-01-22 08:35:44.390645: step: 8/77, loss: 0.025099212303757668 2023-01-22 08:35:45.669540: step: 12/77, loss: 1.4690409443574026e-05 2023-01-22 08:35:46.973373: step: 16/77, loss: 0.02800574153661728 2023-01-22 08:35:48.273252: step: 20/77, loss: 6.664691227342701e-06 2023-01-22 08:35:49.561480: step: 24/77, loss: 2.817774839058984e-05 2023-01-22 08:35:50.862832: step: 28/77, loss: 0.0017202789895236492 2023-01-22 08:35:52.168720: step: 32/77, loss: 0.0005560817080549896 2023-01-22 08:35:53.480943: step: 36/77, loss: 0.004274291917681694 2023-01-22 08:35:54.802020: step: 40/77, loss: 0.05250071361660957 2023-01-22 08:35:56.145548: step: 44/77, loss: 0.0007789755472913384 2023-01-22 08:35:57.438615: step: 48/77, loss: 0.009650000371038914 2023-01-22 08:35:58.713980: step: 52/77, loss: 0.0005584878381341696 2023-01-22 08:36:00.024140: step: 56/77, loss: 0.0029105565045028925 2023-01-22 08:36:01.273630: step: 60/77, loss: 0.005994163453578949 2023-01-22 08:36:02.601535: step: 64/77, loss: 0.007980176247656345 2023-01-22 08:36:03.914103: step: 68/77, loss: 1.2488882020988967e-05 2023-01-22 08:36:05.213756: step: 72/77, loss: 0.0029365727677941322 2023-01-22 08:36:06.509048: step: 76/77, loss: 0.000259653344983235 2023-01-22 08:36:07.833121: step: 80/77, loss: 0.06503565609455109 2023-01-22 08:36:09.112156: step: 84/77, loss: 0.009259148500859737 2023-01-22 08:36:10.391491: step: 88/77, loss: 0.000161934774951078 2023-01-22 08:36:11.687293: step: 92/77, loss: 0.0005904276622459292 2023-01-22 08:36:12.977690: step: 96/77, loss: 0.0002479857357684523 2023-01-22 08:36:14.273029: step: 100/77, loss: 2.837385363818612e-05 2023-01-22 08:36:15.548246: step: 104/77, loss: 0.001441130181774497 2023-01-22 08:36:16.872011: step: 108/77, loss: 0.0002336905017727986 2023-01-22 08:36:18.171624: step: 112/77, loss: 0.02967887371778488 2023-01-22 08:36:19.449470: step: 116/77, loss: 0.0016614971682429314 2023-01-22 08:36:20.778843: step: 120/77, loss: 0.002425632206723094 2023-01-22 08:36:22.079371: step: 124/77, loss: 0.02181481570005417 2023-01-22 08:36:23.373341: step: 128/77, loss: 0.0042419275268912315 2023-01-22 08:36:24.679441: step: 132/77, loss: 0.005345655605196953 2023-01-22 08:36:26.013411: step: 136/77, loss: 0.011341053992509842 2023-01-22 08:36:27.316444: step: 140/77, loss: 0.002245645970106125 2023-01-22 08:36:28.611770: step: 144/77, loss: 0.00017619726713746786 2023-01-22 08:36:29.891169: step: 148/77, loss: 0.014670961536467075 2023-01-22 08:36:31.186875: step: 152/77, loss: 0.00041092970059253275 2023-01-22 08:36:32.477828: step: 156/77, loss: 8.03138391347602e-05 2023-01-22 08:36:33.799126: step: 160/77, loss: 0.003945107106119394 2023-01-22 08:36:35.086444: step: 164/77, loss: 9.81518387561664e-05 2023-01-22 08:36:36.366559: step: 168/77, loss: 0.0017670283559709787 2023-01-22 08:36:37.673963: step: 172/77, loss: 2.3841788276968146e-07 2023-01-22 08:36:38.978100: step: 176/77, loss: 1.1550903764145914e-05 2023-01-22 08:36:40.250136: step: 180/77, loss: 0.0025116358883678913 2023-01-22 08:36:41.577936: step: 184/77, loss: 0.004818919580429792 2023-01-22 08:36:42.883568: step: 188/77, loss: 0.0010180637473240495 2023-01-22 08:36:44.207988: step: 192/77, loss: 0.00011704555799951777 2023-01-22 08:36:45.474156: step: 196/77, loss: 7.31146355974488e-05 2023-01-22 08:36:46.723390: step: 200/77, loss: 0.007681186310946941 2023-01-22 08:36:48.009293: step: 204/77, loss: 0.0030077178962528706 2023-01-22 08:36:49.292774: step: 208/77, loss: 0.021733706817030907 2023-01-22 08:36:50.574453: step: 212/77, loss: 0.005440224893391132 2023-01-22 08:36:51.917486: step: 216/77, loss: 0.007626230828464031 2023-01-22 08:36:53.237354: step: 220/77, loss: 0.0004863716894760728 2023-01-22 08:36:54.555295: step: 224/77, loss: 0.0017873606411740184 2023-01-22 08:36:55.836709: step: 228/77, loss: 7.974127584020607e-06 2023-01-22 08:36:57.143316: step: 232/77, loss: 2.900967956520617e-06 2023-01-22 08:36:58.464059: step: 236/77, loss: 1.7486419892520644e-05 2023-01-22 08:36:59.732201: step: 240/77, loss: 0.041171155869960785 2023-01-22 08:37:01.048007: step: 244/77, loss: 0.0022427060175687075 2023-01-22 08:37:02.341760: step: 248/77, loss: 0.018227651715278625 2023-01-22 08:37:03.604333: step: 252/77, loss: 0.005274713505059481 2023-01-22 08:37:04.886436: step: 256/77, loss: 0.00016559204959776253 2023-01-22 08:37:06.148246: step: 260/77, loss: 0.023844944313168526 2023-01-22 08:37:07.490266: step: 264/77, loss: 0.10402211546897888 2023-01-22 08:37:08.814692: step: 268/77, loss: 1.6917410903261043e-05 2023-01-22 08:37:10.119652: step: 272/77, loss: 6.095885328250006e-05 2023-01-22 08:37:11.400245: step: 276/77, loss: 0.00011749435361707583 2023-01-22 08:37:12.692419: step: 280/77, loss: 1.0508897503314074e-05 2023-01-22 08:37:14.023799: step: 284/77, loss: 0.00043762908899225295 2023-01-22 08:37:15.286533: step: 288/77, loss: 0.00020534142095129937 2023-01-22 08:37:16.639417: step: 292/77, loss: 0.0012233004672452807 2023-01-22 08:37:17.948562: step: 296/77, loss: 4.289309435989708e-05 2023-01-22 08:37:19.252450: step: 300/77, loss: 2.827224670909345e-05 2023-01-22 08:37:20.584938: step: 304/77, loss: 0.0011272351257503033 2023-01-22 08:37:21.835439: step: 308/77, loss: 7.997609827725682e-06 2023-01-22 08:37:23.104277: step: 312/77, loss: 0.007476623170077801 2023-01-22 08:37:24.410020: step: 316/77, loss: 0.00041830280679278076 2023-01-22 08:37:25.743643: step: 320/77, loss: 3.9598311559529975e-05 2023-01-22 08:37:27.054462: step: 324/77, loss: 3.4866736768890405e-06 2023-01-22 08:37:28.323074: step: 328/77, loss: 0.011931387707591057 2023-01-22 08:37:29.607249: step: 332/77, loss: 8.82970925886184e-06 2023-01-22 08:37:30.915064: step: 336/77, loss: 0.021589653566479683 2023-01-22 08:37:32.199121: step: 340/77, loss: 0.00010848701640497893 2023-01-22 08:37:33.496899: step: 344/77, loss: 0.00011941129196202382 2023-01-22 08:37:34.789771: step: 348/77, loss: 0.00019851088291034102 2023-01-22 08:37:36.084196: step: 352/77, loss: 0.0003793227078858763 2023-01-22 08:37:37.427233: step: 356/77, loss: 2.4331914119102294e-06 2023-01-22 08:37:38.724366: step: 360/77, loss: 1.367283402942121e-05 2023-01-22 08:37:40.051993: step: 364/77, loss: 0.00044953133328817785 2023-01-22 08:37:41.323838: step: 368/77, loss: 0.003973706159740686 2023-01-22 08:37:42.621546: step: 372/77, loss: 0.00062417215667665 2023-01-22 08:37:43.905363: step: 376/77, loss: 0.014592466875910759 2023-01-22 08:37:45.184351: step: 380/77, loss: 1.2157357559772208e-05 2023-01-22 08:37:46.474430: step: 384/77, loss: 0.012784970924258232 2023-01-22 08:37:47.771190: step: 388/77, loss: 0.00866780523210764 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 20} Test Chinese: {'template': {'p': 0.9210526315789473, 'r': 0.5511811023622047, 'f1': 0.6896551724137933}, 'slot': {'p': 0.5227272727272727, 'r': 0.019896193771626297, 'f1': 0.03833333333333333}, 'combined': 0.026436781609195405, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 20} Test Korean: {'template': {'p': 0.9210526315789473, 'r': 0.5511811023622047, 'f1': 0.6896551724137933}, 'slot': {'p': 0.5227272727272727, 'r': 0.019896193771626297, 'f1': 0.03833333333333333}, 'combined': 0.026436781609195405, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 20} Test Russian: {'template': {'p': 0.922077922077922, 'r': 0.5590551181102362, 'f1': 0.696078431372549}, 'slot': {'p': 0.5227272727272727, 'r': 0.019896193771626297, 'f1': 0.03833333333333333}, 'combined': 0.02668300653594771, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 21 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:39:31.225519: step: 4/77, loss: 7.048082579785842e-07 2023-01-22 08:39:32.495981: step: 8/77, loss: 0.03427944704890251 2023-01-22 08:39:33.811417: step: 12/77, loss: 7.991908205440268e-05 2023-01-22 08:39:35.089540: step: 16/77, loss: 3.760176332434639e-05 2023-01-22 08:39:36.394305: step: 20/77, loss: 0.02467663213610649 2023-01-22 08:39:37.730175: step: 24/77, loss: 0.10356248170137405 2023-01-22 08:39:38.976592: step: 28/77, loss: 0.022828515619039536 2023-01-22 08:39:40.267889: step: 32/77, loss: 0.0007306609186343849 2023-01-22 08:39:41.595557: step: 36/77, loss: 0.00021144589118193835 2023-01-22 08:39:42.915846: step: 40/77, loss: 5.7149991334881634e-05 2023-01-22 08:39:44.224813: step: 44/77, loss: 0.00032232870580628514 2023-01-22 08:39:45.530866: step: 48/77, loss: 0.029408568516373634 2023-01-22 08:39:46.884887: step: 52/77, loss: 0.06268583983182907 2023-01-22 08:39:48.201648: step: 56/77, loss: 0.0031255376525223255 2023-01-22 08:39:49.465624: step: 60/77, loss: 0.003057542722672224 2023-01-22 08:39:50.772916: step: 64/77, loss: 0.004814974498003721 2023-01-22 08:39:52.045224: step: 68/77, loss: 2.538320404710248e-05 2023-01-22 08:39:53.359360: step: 72/77, loss: 0.0009136873995885253 2023-01-22 08:39:54.694337: step: 76/77, loss: 0.0011968818726018071 2023-01-22 08:39:55.984479: step: 80/77, loss: 0.0003349235048517585 2023-01-22 08:39:57.318034: step: 84/77, loss: 0.06473742425441742 2023-01-22 08:39:58.596718: step: 88/77, loss: 0.00011328059918014333 2023-01-22 08:39:59.931892: step: 92/77, loss: 0.02154296264052391 2023-01-22 08:40:01.235680: step: 96/77, loss: 0.01184603851288557 2023-01-22 08:40:02.527375: step: 100/77, loss: 0.0021092176903039217 2023-01-22 08:40:03.857442: step: 104/77, loss: 2.473575477779377e-07 2023-01-22 08:40:05.116247: step: 108/77, loss: 4.977339995093644e-05 2023-01-22 08:40:06.427963: step: 112/77, loss: 0.00016734329983592033 2023-01-22 08:40:07.696617: step: 116/77, loss: 1.490108871848861e-07 2023-01-22 08:40:09.010791: step: 120/77, loss: 0.0003039098810404539 2023-01-22 08:40:10.282152: step: 124/77, loss: 2.13262319448404e-05 2023-01-22 08:40:11.587880: step: 128/77, loss: 2.0595960450009443e-05 2023-01-22 08:40:12.895099: step: 132/77, loss: 1.0728827248840389e-07 2023-01-22 08:40:14.176080: step: 136/77, loss: 2.98022992950564e-08 2023-01-22 08:40:15.496208: step: 140/77, loss: 2.5778922463359777e-07 2023-01-22 08:40:16.835618: step: 144/77, loss: 0.0001362333568977192 2023-01-22 08:40:18.092242: step: 148/77, loss: 0.008969240821897984 2023-01-22 08:40:19.393023: step: 152/77, loss: 3.102040636804304e-06 2023-01-22 08:40:20.731418: step: 156/77, loss: 0.02981063537299633 2023-01-22 08:40:22.029477: step: 160/77, loss: 0.00039904689765535295 2023-01-22 08:40:23.331332: step: 164/77, loss: 1.2789114407496527e-05 2023-01-22 08:40:24.644248: step: 168/77, loss: 1.2550574865599629e-05 2023-01-22 08:40:25.932795: step: 172/77, loss: 0.004511318635195494 2023-01-22 08:40:27.226470: step: 176/77, loss: 0.003428037278354168 2023-01-22 08:40:28.544384: step: 180/77, loss: 6.116942586231744e-06 2023-01-22 08:40:29.822046: step: 184/77, loss: 0.005304019898176193 2023-01-22 08:40:31.183298: step: 188/77, loss: 0.0006755455979146063 2023-01-22 08:40:32.451465: step: 192/77, loss: 0.0015934238908812404 2023-01-22 08:40:33.735288: step: 196/77, loss: 1.053499090630794e-06 2023-01-22 08:40:35.033356: step: 200/77, loss: 4.461091521079652e-06 2023-01-22 08:40:36.355058: step: 204/77, loss: 5.4923650168348104e-05 2023-01-22 08:40:37.656152: step: 208/77, loss: 0.00013085010868962854 2023-01-22 08:40:38.941894: step: 212/77, loss: 0.016220975667238235 2023-01-22 08:40:40.257110: step: 216/77, loss: 6.899174422869692e-07 2023-01-22 08:40:41.542486: step: 220/77, loss: 0.014226194471120834 2023-01-22 08:40:42.846989: step: 224/77, loss: 6.874488917674171e-06 2023-01-22 08:40:44.172650: step: 228/77, loss: 4.1871919620461995e-07 2023-01-22 08:40:45.475172: step: 232/77, loss: 6.766220758436248e-05 2023-01-22 08:40:46.763664: step: 236/77, loss: 4.716946932603605e-06 2023-01-22 08:40:48.075662: step: 240/77, loss: 1.7191203369293362e-05 2023-01-22 08:40:49.353528: step: 244/77, loss: 0.0005216790596023202 2023-01-22 08:40:50.633809: step: 248/77, loss: 0.0007348595536313951 2023-01-22 08:40:51.861552: step: 252/77, loss: 6.87366773490794e-05 2023-01-22 08:40:53.188105: step: 256/77, loss: 1.0489097803656477e-05 2023-01-22 08:40:54.450785: step: 260/77, loss: 0.011000293307006359 2023-01-22 08:40:55.774768: step: 264/77, loss: 0.007235467433929443 2023-01-22 08:40:57.040291: step: 268/77, loss: 0.0007028059335425496 2023-01-22 08:40:58.308544: step: 272/77, loss: 4.366143548395485e-05 2023-01-22 08:40:59.577619: step: 276/77, loss: 0.0013454877771437168 2023-01-22 08:41:00.863810: step: 280/77, loss: 6.603048677789047e-05 2023-01-22 08:41:02.153692: step: 284/77, loss: 0.02493387833237648 2023-01-22 08:41:03.416989: step: 288/77, loss: 0.0004930190043523908 2023-01-22 08:41:04.699640: step: 292/77, loss: 0.0006468009087257087 2023-01-22 08:41:05.994622: step: 296/77, loss: 0.012403626926243305 2023-01-22 08:41:07.294566: step: 300/77, loss: 0.001801351085305214 2023-01-22 08:41:08.594256: step: 304/77, loss: 2.3860138753661886e-05 2023-01-22 08:41:09.892310: step: 308/77, loss: 0.0017166226170957088 2023-01-22 08:41:11.209834: step: 312/77, loss: 0.0009119111928157508 2023-01-22 08:41:12.562447: step: 316/77, loss: 0.006388116627931595 2023-01-22 08:41:13.863179: step: 320/77, loss: 0.026480983942747116 2023-01-22 08:41:15.128407: step: 324/77, loss: 2.3476293790736236e-05 2023-01-22 08:41:16.387345: step: 328/77, loss: 0.0002862987748812884 2023-01-22 08:41:17.636066: step: 332/77, loss: 2.0965535441064276e-06 2023-01-22 08:41:18.959193: step: 336/77, loss: 1.5079215245350497e-06 2023-01-22 08:41:20.250958: step: 340/77, loss: 7.212046853055654e-07 2023-01-22 08:41:21.526445: step: 344/77, loss: 0.07714388519525528 2023-01-22 08:41:22.831787: step: 348/77, loss: 8.724490908207372e-05 2023-01-22 08:41:24.141621: step: 352/77, loss: 3.0321880331030115e-06 2023-01-22 08:41:25.485013: step: 356/77, loss: 0.00028479041066020727 2023-01-22 08:41:26.787605: step: 360/77, loss: 4.458204421098344e-05 2023-01-22 08:41:28.019701: step: 364/77, loss: 7.021379860816523e-05 2023-01-22 08:41:29.313383: step: 368/77, loss: 0.04699565842747688 2023-01-22 08:41:30.601014: step: 372/77, loss: 0.00013077085895929486 2023-01-22 08:41:31.912741: step: 376/77, loss: 2.8759140491274593e-07 2023-01-22 08:41:33.214238: step: 380/77, loss: 0.00011267088848398998 2023-01-22 08:41:34.550836: step: 384/77, loss: 0.01159019023180008 2023-01-22 08:41:35.814533: step: 388/77, loss: 1.93859477803926e-06 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Chinese: {'template': {'p': 0.9113924050632911, 'r': 0.5669291338582677, 'f1': 0.6990291262135924}, 'slot': {'p': 0.5365853658536586, 'r': 0.01903114186851211, 'f1': 0.036758563074352546}, 'combined': 0.02569530622673188, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Korean: {'template': {'p': 0.9102564102564102, 'r': 0.5590551181102362, 'f1': 0.6926829268292682}, 'slot': {'p': 0.5384615384615384, 'r': 0.018166089965397925, 'f1': 0.035146443514644354}, 'combined': 0.024345341361363404, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Russian: {'template': {'p': 0.9090909090909091, 'r': 0.5511811023622047, 'f1': 0.6862745098039216}, 'slot': {'p': 0.575, 'r': 0.019896193771626297, 'f1': 0.03846153846153846}, 'combined': 0.02639517345399698, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 22 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:43:19.145698: step: 4/77, loss: 0.00034515286097303033 2023-01-22 08:43:20.476735: step: 8/77, loss: 0.00033434914075769484 2023-01-22 08:43:21.783080: step: 12/77, loss: 9.424352901987731e-05 2023-01-22 08:43:23.086005: step: 16/77, loss: 1.2516940728346526e-07 2023-01-22 08:43:24.380466: step: 20/77, loss: 0.0005243119667284191 2023-01-22 08:43:25.596546: step: 24/77, loss: 1.5395658920169808e-05 2023-01-22 08:43:26.868859: step: 28/77, loss: 2.3789565602783114e-05 2023-01-22 08:43:28.159002: step: 32/77, loss: 0.025939157232642174 2023-01-22 08:43:29.470479: step: 36/77, loss: 1.0877823797272868e-07 2023-01-22 08:43:30.780401: step: 40/77, loss: 9.988000783778261e-06 2023-01-22 08:43:32.062128: step: 44/77, loss: 4.319598701840732e-06 2023-01-22 08:43:33.357096: step: 48/77, loss: 6.57673372188583e-05 2023-01-22 08:43:34.610559: step: 52/77, loss: 6.154135121505533e-07 2023-01-22 08:43:35.910289: step: 56/77, loss: 4.635993536794558e-05 2023-01-22 08:43:37.195219: step: 60/77, loss: 0.00046627374831587076 2023-01-22 08:43:38.424164: step: 64/77, loss: 0.03233442083001137 2023-01-22 08:43:39.741253: step: 68/77, loss: 2.6955527573591098e-05 2023-01-22 08:43:40.999454: step: 72/77, loss: 0.00029844618984498084 2023-01-22 08:43:42.259552: step: 76/77, loss: 5.438885750663758e-07 2023-01-22 08:43:43.568157: step: 80/77, loss: 4.504471144173294e-05 2023-01-22 08:43:44.872400: step: 84/77, loss: 5.6063145166262984e-05 2023-01-22 08:43:46.192919: step: 88/77, loss: 0.00044191296910867095 2023-01-22 08:43:47.517156: step: 92/77, loss: 1.559289012220688e-05 2023-01-22 08:43:48.789138: step: 96/77, loss: 3.65998967026826e-05 2023-01-22 08:43:50.060840: step: 100/77, loss: 0.0015643913066014647 2023-01-22 08:43:51.393401: step: 104/77, loss: 0.0005108561599627137 2023-01-22 08:43:52.713405: step: 108/77, loss: 3.702392859850079e-05 2023-01-22 08:43:54.029297: step: 112/77, loss: 1.5779487512190826e-06 2023-01-22 08:43:55.281760: step: 116/77, loss: 8.033843187149614e-06 2023-01-22 08:43:56.597980: step: 120/77, loss: 0.00030644662911072373 2023-01-22 08:43:57.883218: step: 124/77, loss: 0.049470219761133194 2023-01-22 08:43:59.161238: step: 128/77, loss: 4.3808847749460256e-07 2023-01-22 08:44:00.442244: step: 132/77, loss: 1.0594386594675598e-06 2023-01-22 08:44:01.705696: step: 136/77, loss: 6.492507236544043e-05 2023-01-22 08:44:02.982957: step: 140/77, loss: 4.559675232940208e-07 2023-01-22 08:44:04.286813: step: 144/77, loss: 0.0018354627536609769 2023-01-22 08:44:05.580905: step: 148/77, loss: 0.01070198230445385 2023-01-22 08:44:06.876125: step: 152/77, loss: 0.03983582183718681 2023-01-22 08:44:08.188647: step: 156/77, loss: 0.03791589289903641 2023-01-22 08:44:09.504036: step: 160/77, loss: 8.679709026182536e-06 2023-01-22 08:44:10.793840: step: 164/77, loss: 0.004185084719210863 2023-01-22 08:44:12.095788: step: 168/77, loss: 1.7537532812639256e-06 2023-01-22 08:44:13.380229: step: 172/77, loss: 0.00013599536032415926 2023-01-22 08:44:14.700229: step: 176/77, loss: 0.019619328901171684 2023-01-22 08:44:16.033219: step: 180/77, loss: 1.1060351425840054e-05 2023-01-22 08:44:17.345464: step: 184/77, loss: 2.2942162104300223e-05 2023-01-22 08:44:18.634396: step: 188/77, loss: 0.0002040996914729476 2023-01-22 08:44:19.948723: step: 192/77, loss: 0.003156597726047039 2023-01-22 08:44:21.236256: step: 196/77, loss: 1.1533255701579037e-06 2023-01-22 08:44:22.480601: step: 200/77, loss: 0.00010490609565749764 2023-01-22 08:44:23.737479: step: 204/77, loss: 4.532407729129773e-06 2023-01-22 08:44:25.066586: step: 208/77, loss: 0.005407263059169054 2023-01-22 08:44:26.356179: step: 212/77, loss: 3.213984200556297e-06 2023-01-22 08:44:27.669359: step: 216/77, loss: 0.0004152047913521528 2023-01-22 08:44:29.028554: step: 220/77, loss: 0.00022536289179697633 2023-01-22 08:44:30.301392: step: 224/77, loss: 0.053443796932697296 2023-01-22 08:44:31.567613: step: 228/77, loss: 0.08764869719743729 2023-01-22 08:44:32.863171: step: 232/77, loss: 0.007128824945539236 2023-01-22 08:44:34.182179: step: 236/77, loss: 0.02799457125365734 2023-01-22 08:44:35.431514: step: 240/77, loss: 2.533893712097779e-05 2023-01-22 08:44:36.678218: step: 244/77, loss: 2.8621703677345067e-05 2023-01-22 08:44:37.977589: step: 248/77, loss: 3.5507250686350744e-06 2023-01-22 08:44:39.273195: step: 252/77, loss: 0.0003244962135795504 2023-01-22 08:44:40.590836: step: 256/77, loss: 5.079111360828392e-05 2023-01-22 08:44:41.874822: step: 260/77, loss: 7.101958999555791e-06 2023-01-22 08:44:43.159531: step: 264/77, loss: 4.6427543566096574e-05 2023-01-22 08:44:44.452077: step: 268/77, loss: 0.0014632672537118196 2023-01-22 08:44:45.788086: step: 272/77, loss: 4.038164718167536e-07 2023-01-22 08:44:47.106696: step: 276/77, loss: 0.006101526785641909 2023-01-22 08:44:48.410522: step: 280/77, loss: 0.0002811462036333978 2023-01-22 08:44:49.717700: step: 284/77, loss: 1.0437474884383846e-05 2023-01-22 08:44:51.000868: step: 288/77, loss: 1.8505952539271675e-06 2023-01-22 08:44:52.306264: step: 292/77, loss: 0.0013959399657323956 2023-01-22 08:44:53.621114: step: 296/77, loss: 0.017746856436133385 2023-01-22 08:44:54.898125: step: 300/77, loss: 0.00010268734331475571 2023-01-22 08:44:56.195381: step: 304/77, loss: 3.69758672604803e-05 2023-01-22 08:44:57.468213: step: 308/77, loss: 0.014934529550373554 2023-01-22 08:44:58.808841: step: 312/77, loss: 4.276600407138176e-07 2023-01-22 08:45:00.101588: step: 316/77, loss: 0.0001311105879722163 2023-01-22 08:45:01.359876: step: 320/77, loss: 5.811410801470629e-07 2023-01-22 08:45:02.681812: step: 324/77, loss: 0.0001222739228978753 2023-01-22 08:45:04.009512: step: 328/77, loss: 0.002665694570168853 2023-01-22 08:45:05.315086: step: 332/77, loss: 1.1662889846775215e-05 2023-01-22 08:45:06.631491: step: 336/77, loss: 4.84055954075302e-06 2023-01-22 08:45:07.955141: step: 340/77, loss: 0.09958529472351074 2023-01-22 08:45:09.268616: step: 344/77, loss: 9.598407814337406e-06 2023-01-22 08:45:10.603206: step: 348/77, loss: 2.340669379918836e-05 2023-01-22 08:45:11.915057: step: 352/77, loss: 4.857738531427458e-07 2023-01-22 08:45:13.209560: step: 356/77, loss: 2.4492983357049525e-05 2023-01-22 08:45:14.525369: step: 360/77, loss: 3.918967763638648e-07 2023-01-22 08:45:15.837943: step: 364/77, loss: 1.9481307390378788e-05 2023-01-22 08:45:17.152649: step: 368/77, loss: 2.6498250008444302e-05 2023-01-22 08:45:18.477996: step: 372/77, loss: 1.524307322142704e-06 2023-01-22 08:45:19.828052: step: 376/77, loss: 0.0005987274344079196 2023-01-22 08:45:21.136490: step: 380/77, loss: 0.0001938179339049384 2023-01-22 08:45:22.487788: step: 384/77, loss: 0.1129758358001709 2023-01-22 08:45:23.795718: step: 388/77, loss: 3.8423590012826025e-05 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Chinese: {'template': {'p': 0.9090909090909091, 'r': 0.5511811023622047, 'f1': 0.6862745098039216}, 'slot': {'p': 0.55, 'r': 0.01903114186851211, 'f1': 0.03678929765886288}, 'combined': 0.02524755721686668, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Korean: {'template': {'p': 0.9090909090909091, 'r': 0.5511811023622047, 'f1': 0.6862745098039216}, 'slot': {'p': 0.5121951219512195, 'r': 0.018166089965397925, 'f1': 0.03508771929824562}, 'combined': 0.02407980736154111, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Russian: {'template': {'p': 0.9090909090909091, 'r': 0.5511811023622047, 'f1': 0.6862745098039216}, 'slot': {'p': 0.5, 'r': 0.01730103806228374, 'f1': 0.033444816053511704}, 'combined': 0.02295232474260607, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 23 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:47:07.212419: step: 4/77, loss: 9.337160736322403e-05 2023-01-22 08:47:08.476270: step: 8/77, loss: 8.200939191738144e-06 2023-01-22 08:47:09.793349: step: 12/77, loss: 3.47598006555927e-06 2023-01-22 08:47:11.047309: step: 16/77, loss: 0.0028689149767160416 2023-01-22 08:47:12.312189: step: 20/77, loss: 0.0015908610075712204 2023-01-22 08:47:13.618523: step: 24/77, loss: 0.01427928265184164 2023-01-22 08:47:14.933503: step: 28/77, loss: 1.1142165021738037e-05 2023-01-22 08:47:16.201381: step: 32/77, loss: 0.0005018312949687243 2023-01-22 08:47:17.506470: step: 36/77, loss: 0.02262580208480358 2023-01-22 08:47:18.782884: step: 40/77, loss: 5.2290601161075756e-05 2023-01-22 08:47:20.129404: step: 44/77, loss: 4.255419844412245e-05 2023-01-22 08:47:21.409144: step: 48/77, loss: 7.872828427935019e-05 2023-01-22 08:47:22.693968: step: 52/77, loss: 0.0005638409056700766 2023-01-22 08:47:24.014219: step: 56/77, loss: 0.0003538989694789052 2023-01-22 08:47:25.298763: step: 60/77, loss: 0.0007779510924592614 2023-01-22 08:47:26.615816: step: 64/77, loss: 3.0188837172318017e-06 2023-01-22 08:47:27.936927: step: 68/77, loss: 0.022698700428009033 2023-01-22 08:47:29.300953: step: 72/77, loss: 0.006691284477710724 2023-01-22 08:47:30.602139: step: 76/77, loss: 0.0014096861705183983 2023-01-22 08:47:31.889243: step: 80/77, loss: 0.13911856710910797 2023-01-22 08:47:33.190126: step: 84/77, loss: 1.3243750800029375e-05 2023-01-22 08:47:34.513833: step: 88/77, loss: 0.0006453330861404538 2023-01-22 08:47:35.798959: step: 92/77, loss: 2.8783524612663314e-05 2023-01-22 08:47:37.087429: step: 96/77, loss: 0.04625452682375908 2023-01-22 08:47:38.400851: step: 100/77, loss: 4.312382225180045e-05 2023-01-22 08:47:39.714432: step: 104/77, loss: 0.000414960813941434 2023-01-22 08:47:41.016675: step: 108/77, loss: 3.2782541126152864e-08 2023-01-22 08:47:42.284846: step: 112/77, loss: 1.6369896911783144e-05 2023-01-22 08:47:43.614200: step: 116/77, loss: 1.0430801467009587e-07 2023-01-22 08:47:44.920035: step: 120/77, loss: 0.0005855010822415352 2023-01-22 08:47:46.195965: step: 124/77, loss: 0.00018316449131816626 2023-01-22 08:47:47.483596: step: 128/77, loss: 0.17900878190994263 2023-01-22 08:47:48.761066: step: 132/77, loss: 7.727268894086592e-06 2023-01-22 08:47:50.079105: step: 136/77, loss: 0.0010186383733525872 2023-01-22 08:47:51.340785: step: 140/77, loss: 9.662757656769827e-05 2023-01-22 08:47:52.591639: step: 144/77, loss: 2.028022436206811e-06 2023-01-22 08:47:53.877268: step: 148/77, loss: 1.0520020623516757e-06 2023-01-22 08:47:55.149224: step: 152/77, loss: 0.0005013812333345413 2023-01-22 08:47:56.431879: step: 156/77, loss: 0.00014438100333791226 2023-01-22 08:47:57.761895: step: 160/77, loss: 8.661092579131946e-06 2023-01-22 08:47:59.100737: step: 164/77, loss: 0.007426140364259481 2023-01-22 08:48:00.370022: step: 168/77, loss: 0.00012616364983841777 2023-01-22 08:48:01.691917: step: 172/77, loss: 0.030256465077400208 2023-01-22 08:48:03.017479: step: 176/77, loss: 3.499081140034832e-05 2023-01-22 08:48:04.325485: step: 180/77, loss: 0.0002845051931217313 2023-01-22 08:48:05.635853: step: 184/77, loss: 0.0301145538687706 2023-01-22 08:48:06.905891: step: 188/77, loss: 0.02152554877102375 2023-01-22 08:48:08.195906: step: 192/77, loss: 0.00019312337099108845 2023-01-22 08:48:09.524377: step: 196/77, loss: 0.0016593344043940306 2023-01-22 08:48:10.797450: step: 200/77, loss: 0.020302653312683105 2023-01-22 08:48:12.101284: step: 204/77, loss: 9.856343967840075e-05 2023-01-22 08:48:13.416922: step: 208/77, loss: 1.2278114809305407e-06 2023-01-22 08:48:14.680895: step: 212/77, loss: 0.05086364597082138 2023-01-22 08:48:15.961340: step: 216/77, loss: 1.1684308447001968e-05 2023-01-22 08:48:17.240235: step: 220/77, loss: 0.019000614061951637 2023-01-22 08:48:18.549540: step: 224/77, loss: 0.001930738682858646 2023-01-22 08:48:19.845148: step: 228/77, loss: 0.0012907341588288546 2023-01-22 08:48:21.209011: step: 232/77, loss: 1.937150528874554e-08 2023-01-22 08:48:22.481982: step: 236/77, loss: 4.7083499339350965e-06 2023-01-22 08:48:23.782862: step: 240/77, loss: 0.0022569689899683 2023-01-22 08:48:25.059948: step: 244/77, loss: 5.0357443797111046e-06 2023-01-22 08:48:26.356920: step: 248/77, loss: 1.2719761798507534e-05 2023-01-22 08:48:27.631331: step: 252/77, loss: 0.002375055104494095 2023-01-22 08:48:28.951984: step: 256/77, loss: 6.614408448513132e-06 2023-01-22 08:48:30.243446: step: 260/77, loss: 0.015899265184998512 2023-01-22 08:48:31.517072: step: 264/77, loss: 0.0006511949468404055 2023-01-22 08:48:32.787903: step: 268/77, loss: 9.760146895132493e-07 2023-01-22 08:48:34.065052: step: 272/77, loss: 0.004656706005334854 2023-01-22 08:48:35.400826: step: 276/77, loss: 1.0758431017166004e-06 2023-01-22 08:48:36.701784: step: 280/77, loss: 4.061367144458927e-05 2023-01-22 08:48:37.951483: step: 284/77, loss: 4.6537832531612366e-05 2023-01-22 08:48:39.253482: step: 288/77, loss: 0.0011781684588640928 2023-01-22 08:48:40.540005: step: 292/77, loss: 8.895739301806316e-05 2023-01-22 08:48:41.873281: step: 296/77, loss: 0.0001069565478246659 2023-01-22 08:48:43.149414: step: 300/77, loss: 0.006709199398756027 2023-01-22 08:48:44.407489: step: 304/77, loss: 0.044382717460393906 2023-01-22 08:48:45.646777: step: 308/77, loss: 0.0005866599385626614 2023-01-22 08:48:46.946553: step: 312/77, loss: 0.00019547372357919812 2023-01-22 08:48:48.220766: step: 316/77, loss: 1.966942875242239e-07 2023-01-22 08:48:49.519351: step: 320/77, loss: 0.016215059906244278 2023-01-22 08:48:50.800752: step: 324/77, loss: 1.3859395039617084e-05 2023-01-22 08:48:52.122194: step: 328/77, loss: 0.005409374833106995 2023-01-22 08:48:53.420495: step: 332/77, loss: 0.02341928333044052 2023-01-22 08:48:54.710793: step: 336/77, loss: 1.094659910449991e-05 2023-01-22 08:48:56.022368: step: 340/77, loss: 5.960464122267695e-09 2023-01-22 08:48:57.337763: step: 344/77, loss: 3.83093856726191e-06 2023-01-22 08:48:58.682356: step: 348/77, loss: 0.008367887698113918 2023-01-22 08:48:59.931613: step: 352/77, loss: 5.503306510945549e-06 2023-01-22 08:49:01.245333: step: 356/77, loss: 1.6103691450553015e-05 2023-01-22 08:49:02.554035: step: 360/77, loss: 0.07554967701435089 2023-01-22 08:49:03.823577: step: 364/77, loss: 4.172321865780759e-08 2023-01-22 08:49:05.094402: step: 368/77, loss: 0.00906360149383545 2023-01-22 08:49:06.376810: step: 372/77, loss: 1.6774265532149002e-05 2023-01-22 08:49:07.645579: step: 376/77, loss: 0.03743258863687515 2023-01-22 08:49:08.965618: step: 380/77, loss: 5.318142211763188e-05 2023-01-22 08:49:10.226615: step: 384/77, loss: 0.01306243147701025 2023-01-22 08:49:11.518202: step: 388/77, loss: 9.19883168535307e-05 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Chinese: {'template': {'p': 0.9367088607594937, 'r': 0.5826771653543307, 'f1': 0.7184466019417475}, 'slot': {'p': 0.5897435897435898, 'r': 0.019896193771626297, 'f1': 0.03849372384937238}, 'combined': 0.02765568509566559, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Korean: {'template': {'p': 0.9367088607594937, 'r': 0.5826771653543307, 'f1': 0.7184466019417475}, 'slot': {'p': 0.5897435897435898, 'r': 0.019896193771626297, 'f1': 0.03849372384937238}, 'combined': 0.02765568509566559, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Russian: {'template': {'p': 0.9367088607594937, 'r': 0.5826771653543307, 'f1': 0.7184466019417475}, 'slot': {'p': 0.5897435897435898, 'r': 0.019896193771626297, 'f1': 0.03849372384937238}, 'combined': 0.02765568509566559, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 24 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:50:54.866348: step: 4/77, loss: 0.00380399776622653 2023-01-22 08:50:56.211086: step: 8/77, loss: 3.5082361137028784e-05 2023-01-22 08:50:57.498098: step: 12/77, loss: 1.9221988623030484e-06 2023-01-22 08:50:58.797158: step: 16/77, loss: 0.015467840246856213 2023-01-22 08:51:00.068506: step: 20/77, loss: 8.227418584283441e-05 2023-01-22 08:51:01.353678: step: 24/77, loss: 4.65135326521704e-06 2023-01-22 08:51:02.594460: step: 28/77, loss: 0.0009193022851832211 2023-01-22 08:51:03.889349: step: 32/77, loss: 6.777806447644252e-06 2023-01-22 08:51:05.169702: step: 36/77, loss: 0.0033016628585755825 2023-01-22 08:51:06.518179: step: 40/77, loss: 0.0022671720944344997 2023-01-22 08:51:07.818516: step: 44/77, loss: 1.921259354276117e-05 2023-01-22 08:51:09.116694: step: 48/77, loss: 6.093090632930398e-06 2023-01-22 08:51:10.390714: step: 52/77, loss: 1.2281492672627792e-05 2023-01-22 08:51:11.683571: step: 56/77, loss: 0.0006353338831104338 2023-01-22 08:51:12.976370: step: 60/77, loss: 2.990082612086553e-05 2023-01-22 08:51:14.225167: step: 64/77, loss: 0.027835655957460403 2023-01-22 08:51:15.520501: step: 68/77, loss: 1.7180659597215708e-06 2023-01-22 08:51:16.803905: step: 72/77, loss: 0.052142348140478134 2023-01-22 08:51:18.105370: step: 76/77, loss: 2.086162353975851e-08 2023-01-22 08:51:19.367459: step: 80/77, loss: 9.739868255564943e-05 2023-01-22 08:51:20.646236: step: 84/77, loss: 9.849472917267121e-07 2023-01-22 08:51:21.911487: step: 88/77, loss: 2.7464941013022326e-05 2023-01-22 08:51:23.187613: step: 92/77, loss: 0.007449309341609478 2023-01-22 08:51:24.482656: step: 96/77, loss: 7.262600411195308e-05 2023-01-22 08:51:25.774604: step: 100/77, loss: 1.0117654483110528e-06 2023-01-22 08:51:27.052213: step: 104/77, loss: 0.00021382025443017483 2023-01-22 08:51:28.382327: step: 108/77, loss: 1.078596142178867e-05 2023-01-22 08:51:29.693670: step: 112/77, loss: 8.99299448064994e-06 2023-01-22 08:51:30.982481: step: 116/77, loss: 1.023694949253695e-06 2023-01-22 08:51:32.250391: step: 120/77, loss: 2.2249520043260418e-05 2023-01-22 08:51:33.520427: step: 124/77, loss: 0.02711966075003147 2023-01-22 08:51:34.829152: step: 128/77, loss: 9.983756399378763e-08 2023-01-22 08:51:36.098984: step: 132/77, loss: 0.00011487719893921167 2023-01-22 08:51:37.399629: step: 136/77, loss: 0.013438182882964611 2023-01-22 08:51:38.687872: step: 140/77, loss: 0.017233524471521378 2023-01-22 08:51:39.957880: step: 144/77, loss: 3.7252888773764425e-08 2023-01-22 08:51:41.284390: step: 148/77, loss: 0.013860609382390976 2023-01-22 08:51:42.567662: step: 152/77, loss: 1.5586372228426626e-06 2023-01-22 08:51:43.890272: step: 156/77, loss: 1.7074664356186986e-05 2023-01-22 08:51:45.204122: step: 160/77, loss: 9.230232535628602e-05 2023-01-22 08:51:46.495755: step: 164/77, loss: 3.665661836294021e-07 2023-01-22 08:51:47.838348: step: 168/77, loss: 3.93847658415325e-05 2023-01-22 08:51:49.148132: step: 172/77, loss: 5.1080780394840986e-05 2023-01-22 08:51:50.488126: step: 176/77, loss: 0.006950449664145708 2023-01-22 08:51:51.781332: step: 180/77, loss: 1.3340352779778186e-05 2023-01-22 08:51:53.067705: step: 184/77, loss: 1.596692527527921e-05 2023-01-22 08:51:54.359748: step: 188/77, loss: 8.556472312193364e-05 2023-01-22 08:51:55.621926: step: 192/77, loss: 2.077104227282689e-06 2023-01-22 08:51:56.947736: step: 196/77, loss: 1.8318276488571428e-05 2023-01-22 08:51:58.230115: step: 200/77, loss: 0.015348915942013264 2023-01-22 08:51:59.554564: step: 204/77, loss: 5.960463678178485e-09 2023-01-22 08:52:00.835053: step: 208/77, loss: 0.001952013815753162 2023-01-22 08:52:02.128525: step: 212/77, loss: 0.021748293191194534 2023-01-22 08:52:03.374421: step: 216/77, loss: 1.0564673402768676e-06 2023-01-22 08:52:04.725997: step: 220/77, loss: 0.00024628365645185113 2023-01-22 08:52:06.002649: step: 224/77, loss: 0.018810153007507324 2023-01-22 08:52:07.318714: step: 228/77, loss: 0.07416260987520218 2023-01-22 08:52:08.573543: step: 232/77, loss: 0.001658955472521484 2023-01-22 08:52:09.860818: step: 236/77, loss: 1.947512600963819e-06 2023-01-22 08:52:11.194052: step: 240/77, loss: 0.0003093902487307787 2023-01-22 08:52:12.463941: step: 244/77, loss: 2.1571084289462306e-05 2023-01-22 08:52:13.737378: step: 248/77, loss: 0.03571411967277527 2023-01-22 08:52:15.035482: step: 252/77, loss: 1.9538061678758822e-05 2023-01-22 08:52:16.317642: step: 256/77, loss: 9.11098686628975e-06 2023-01-22 08:52:17.605599: step: 260/77, loss: 0.0015212477883324027 2023-01-22 08:52:18.892514: step: 264/77, loss: 2.640430466271937e-05 2023-01-22 08:52:20.190494: step: 268/77, loss: 3.8302790926536545e-05 2023-01-22 08:52:21.504439: step: 272/77, loss: 0.04266560077667236 2023-01-22 08:52:22.815677: step: 276/77, loss: 0.008817881345748901 2023-01-22 08:52:24.096253: step: 280/77, loss: 0.005077583249658346 2023-01-22 08:52:25.385474: step: 284/77, loss: 0.006172202993184328 2023-01-22 08:52:26.695589: step: 288/77, loss: 1.3753941857430618e-05 2023-01-22 08:52:28.017632: step: 292/77, loss: 2.6462712412467226e-06 2023-01-22 08:52:29.304572: step: 296/77, loss: 2.9802320611338473e-09 2023-01-22 08:52:30.567718: step: 300/77, loss: 0.01422153040766716 2023-01-22 08:52:31.872895: step: 304/77, loss: 0.07386370003223419 2023-01-22 08:52:33.164503: step: 308/77, loss: 1.993622390727978e-06 2023-01-22 08:52:34.420033: step: 312/77, loss: 0.0001149525196524337 2023-01-22 08:52:35.750119: step: 316/77, loss: 5.941071140114218e-05 2023-01-22 08:52:37.022701: step: 320/77, loss: 1.8775371302126587e-07 2023-01-22 08:52:38.290976: step: 324/77, loss: 0.0025986775290220976 2023-01-22 08:52:39.578144: step: 328/77, loss: 0.03418916463851929 2023-01-22 08:52:40.862314: step: 332/77, loss: 2.363630301260855e-05 2023-01-22 08:52:42.174546: step: 336/77, loss: 0.03347557410597801 2023-01-22 08:52:43.452830: step: 340/77, loss: 0.01019981037825346 2023-01-22 08:52:44.812452: step: 344/77, loss: 0.01401336770504713 2023-01-22 08:52:46.099550: step: 348/77, loss: 0.00014336804451886564 2023-01-22 08:52:47.428538: step: 352/77, loss: 0.008555217646062374 2023-01-22 08:52:48.692589: step: 356/77, loss: 0.008543770760297775 2023-01-22 08:52:49.976631: step: 360/77, loss: 0.007736127823591232 2023-01-22 08:52:51.256537: step: 364/77, loss: 0.005396129097789526 2023-01-22 08:52:52.501132: step: 368/77, loss: 0.0014105373993515968 2023-01-22 08:52:53.821953: step: 372/77, loss: 0.00011384568642824888 2023-01-22 08:52:55.153888: step: 376/77, loss: 0.00010466657113283873 2023-01-22 08:52:56.433792: step: 380/77, loss: 2.801337132041226e-06 2023-01-22 08:52:57.705608: step: 384/77, loss: 0.0005902171251364052 2023-01-22 08:52:58.960186: step: 388/77, loss: 0.00267789582721889 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 24} Test Chinese: {'template': {'p': 0.9066666666666666, 'r': 0.5354330708661418, 'f1': 0.6732673267326733}, 'slot': {'p': 0.5405405405405406, 'r': 0.01730103806228374, 'f1': 0.03352891869237217}, 'combined': 0.022573925456250574, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 24} Test Korean: {'template': {'p': 0.9078947368421053, 'r': 0.5433070866141733, 'f1': 0.6798029556650248}, 'slot': {'p': 0.5263157894736842, 'r': 0.01730103806228374, 'f1': 0.03350083752093802}, 'combined': 0.02277396836398743, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 24} Test Russian: {'template': {'p': 0.9066666666666666, 'r': 0.5354330708661418, 'f1': 0.6732673267326733}, 'slot': {'p': 0.5277777777777778, 'r': 0.01643598615916955, 'f1': 0.031879194630872486}, 'combined': 0.02146322014751811, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 25 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:54:38.564885: step: 4/77, loss: 7.5849900895264e-05 2023-01-22 08:54:39.819670: step: 8/77, loss: 0.0002651477698236704 2023-01-22 08:54:41.048910: step: 12/77, loss: 2.5703475330374204e-06 2023-01-22 08:54:42.325451: step: 16/77, loss: 0.0011636014096438885 2023-01-22 08:54:43.654980: step: 20/77, loss: 0.0013986665289849043 2023-01-22 08:54:44.965775: step: 24/77, loss: 2.3691984551987844e-06 2023-01-22 08:54:46.211108: step: 28/77, loss: 4.116630952921696e-05 2023-01-22 08:54:47.559610: step: 32/77, loss: 0.037189774215221405 2023-01-22 08:54:48.872998: step: 36/77, loss: 0.03908061981201172 2023-01-22 08:54:50.173283: step: 40/77, loss: 0.022363733500242233 2023-01-22 08:54:51.450497: step: 44/77, loss: 0.00549747608602047 2023-01-22 08:54:52.718443: step: 48/77, loss: 0.0003269324661232531 2023-01-22 08:54:54.013784: step: 52/77, loss: 0.0001640704576857388 2023-01-22 08:54:55.265076: step: 56/77, loss: 0.00024002409190870821 2023-01-22 08:54:56.501538: step: 60/77, loss: 0.0023460511583834887 2023-01-22 08:54:57.800893: step: 64/77, loss: 0.012938913889229298 2023-01-22 08:54:59.054397: step: 68/77, loss: 0.009058503434062004 2023-01-22 08:55:00.328275: step: 72/77, loss: 0.4155775308609009 2023-01-22 08:55:01.611274: step: 76/77, loss: 0.045352742075920105 2023-01-22 08:55:02.921057: step: 80/77, loss: 0.0008751029963605106 2023-01-22 08:55:04.190544: step: 84/77, loss: 1.10414327991748e-06 2023-01-22 08:55:05.475288: step: 88/77, loss: 0.00023450664593838155 2023-01-22 08:55:06.793213: step: 92/77, loss: 0.05811518058180809 2023-01-22 08:55:08.050401: step: 96/77, loss: 0.0003901486925315112 2023-01-22 08:55:09.331884: step: 100/77, loss: 0.00014174467651173472 2023-01-22 08:55:10.638590: step: 104/77, loss: 1.921330658660736e-05 2023-01-22 08:55:11.898227: step: 108/77, loss: 2.3113034330890514e-05 2023-01-22 08:55:13.202339: step: 112/77, loss: 1.6413274352089502e-05 2023-01-22 08:55:14.517177: step: 116/77, loss: 5.45346483704634e-06 2023-01-22 08:55:15.814747: step: 120/77, loss: 5.947385943727568e-06 2023-01-22 08:55:17.091452: step: 124/77, loss: 4.385063220979646e-06 2023-01-22 08:55:18.405034: step: 128/77, loss: 0.0008605056791566312 2023-01-22 08:55:19.699614: step: 132/77, loss: 0.0001738389692036435 2023-01-22 08:55:20.955933: step: 136/77, loss: 1.816932126530446e-05 2023-01-22 08:55:22.241412: step: 140/77, loss: 0.002159570576623082 2023-01-22 08:55:23.525348: step: 144/77, loss: 9.741610119817778e-05 2023-01-22 08:55:24.804082: step: 148/77, loss: 0.024993371218442917 2023-01-22 08:55:26.111976: step: 152/77, loss: 1.113360485760495e-05 2023-01-22 08:55:27.356920: step: 156/77, loss: 2.1446699975058436e-05 2023-01-22 08:55:28.681213: step: 160/77, loss: 0.00607384042814374 2023-01-22 08:55:29.969656: step: 164/77, loss: 0.00035711575765162706 2023-01-22 08:55:31.249953: step: 168/77, loss: 2.063775173155591e-05 2023-01-22 08:55:32.538938: step: 172/77, loss: 0.0007445422234013677 2023-01-22 08:55:33.835125: step: 176/77, loss: 0.021952372044324875 2023-01-22 08:55:35.149624: step: 180/77, loss: 4.619354498913708e-08 2023-01-22 08:55:36.462218: step: 184/77, loss: 8.031611287151463e-07 2023-01-22 08:55:37.724774: step: 188/77, loss: 1.2899691682832781e-05 2023-01-22 08:55:38.988954: step: 192/77, loss: 0.0002587594208307564 2023-01-22 08:55:40.270183: step: 196/77, loss: 1.0057990493805846e-06 2023-01-22 08:55:41.540260: step: 200/77, loss: 0.00792799610644579 2023-01-22 08:55:42.818941: step: 204/77, loss: 0.00010202324483543634 2023-01-22 08:55:44.065508: step: 208/77, loss: 0.0004384967323858291 2023-01-22 08:55:45.318859: step: 212/77, loss: 0.0008030325989238918 2023-01-22 08:55:46.560753: step: 216/77, loss: 0.2682453393936157 2023-01-22 08:55:47.861008: step: 220/77, loss: 0.005948225501924753 2023-01-22 08:55:49.158793: step: 224/77, loss: 3.5166578982170904e-07 2023-01-22 08:55:50.449185: step: 228/77, loss: 6.484018740593456e-06 2023-01-22 08:55:51.760833: step: 232/77, loss: 0.012864273972809315 2023-01-22 08:55:53.020680: step: 236/77, loss: 1.4885420114296721e-06 2023-01-22 08:55:54.281097: step: 240/77, loss: 0.0001642719144001603 2023-01-22 08:55:55.599750: step: 244/77, loss: 0.012073754332959652 2023-01-22 08:55:56.863964: step: 248/77, loss: 0.009761764667928219 2023-01-22 08:55:58.115066: step: 252/77, loss: 6.24136646365514e-06 2023-01-22 08:55:59.342655: step: 256/77, loss: 0.00013278864207677543 2023-01-22 08:56:00.650376: step: 260/77, loss: 0.0011594881070777774 2023-01-22 08:56:01.907224: step: 264/77, loss: 3.1124171073315665e-05 2023-01-22 08:56:03.210804: step: 268/77, loss: 0.0003066223580390215 2023-01-22 08:56:04.549382: step: 272/77, loss: 0.014135126024484634 2023-01-22 08:56:05.812093: step: 276/77, loss: 0.01993345282971859 2023-01-22 08:56:07.131670: step: 280/77, loss: 0.00028734607622027397 2023-01-22 08:56:08.431300: step: 284/77, loss: 0.006775941699743271 2023-01-22 08:56:09.702508: step: 288/77, loss: 0.00016594542830716819 2023-01-22 08:56:10.988704: step: 292/77, loss: 1.6688773030182347e-06 2023-01-22 08:56:12.276274: step: 296/77, loss: 2.0462706743273884e-05 2023-01-22 08:56:13.560290: step: 300/77, loss: 1.6956513491095393e-06 2023-01-22 08:56:14.847343: step: 304/77, loss: 0.00023198648705147207 2023-01-22 08:56:16.142835: step: 308/77, loss: 0.002622495638206601 2023-01-22 08:56:17.399825: step: 312/77, loss: 0.05106724798679352 2023-01-22 08:56:18.677832: step: 316/77, loss: 0.001140963053330779 2023-01-22 08:56:20.001315: step: 320/77, loss: 0.005684789270162582 2023-01-22 08:56:21.278210: step: 324/77, loss: 2.4211341951740906e-05 2023-01-22 08:56:22.512964: step: 328/77, loss: 7.808050668245414e-07 2023-01-22 08:56:23.796706: step: 332/77, loss: 0.019995568320155144 2023-01-22 08:56:25.097955: step: 336/77, loss: 3.076811117352918e-05 2023-01-22 08:56:26.361018: step: 340/77, loss: 2.486864832462743e-05 2023-01-22 08:56:27.662916: step: 344/77, loss: 9.378846880281344e-05 2023-01-22 08:56:28.970990: step: 348/77, loss: 0.0006190786371007562 2023-01-22 08:56:30.237443: step: 352/77, loss: 0.0008651064126752317 2023-01-22 08:56:31.541875: step: 356/77, loss: 0.00035393013968132436 2023-01-22 08:56:32.795072: step: 360/77, loss: 0.006651232950389385 2023-01-22 08:56:34.067305: step: 364/77, loss: 2.03934105229564e-05 2023-01-22 08:56:35.299078: step: 368/77, loss: 5.334555339686631e-07 2023-01-22 08:56:36.588902: step: 372/77, loss: 1.5221969078993425e-05 2023-01-22 08:56:37.814857: step: 376/77, loss: 0.0008514021174050868 2023-01-22 08:56:39.125577: step: 380/77, loss: 0.0064077554270625114 2023-01-22 08:56:40.384122: step: 384/77, loss: 6.021185254212469e-05 2023-01-22 08:56:41.695559: step: 388/77, loss: 0.04518267512321472 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Chinese: {'template': {'p': 0.9358974358974359, 'r': 0.5748031496062992, 'f1': 0.7121951219512195}, 'slot': {'p': 0.6190476190476191, 'r': 0.02249134948096886, 'f1': 0.04340567612687813}, 'combined': 0.030913310802557107, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Korean: {'template': {'p': 0.9358974358974359, 'r': 0.5748031496062992, 'f1': 0.7121951219512195}, 'slot': {'p': 0.6190476190476191, 'r': 0.02249134948096886, 'f1': 0.04340567612687813}, 'combined': 0.030913310802557107, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Russian: {'template': {'p': 0.9358974358974359, 'r': 0.5748031496062992, 'f1': 0.7121951219512195}, 'slot': {'p': 0.6190476190476191, 'r': 0.02249134948096886, 'f1': 0.04340567612687813}, 'combined': 0.030913310802557107, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 26 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:58:21.438650: step: 4/77, loss: 1.244232748831564e-06 2023-01-22 08:58:22.742072: step: 8/77, loss: 0.01106318924576044 2023-01-22 08:58:24.014490: step: 12/77, loss: 1.7734229913912714e-05 2023-01-22 08:58:25.280399: step: 16/77, loss: 0.04055573791265488 2023-01-22 08:58:26.578474: step: 20/77, loss: 0.011438457295298576 2023-01-22 08:58:27.828592: step: 24/77, loss: 0.02927778847515583 2023-01-22 08:58:29.031287: step: 28/77, loss: 0.00024149783712346107 2023-01-22 08:58:30.324969: step: 32/77, loss: 3.0069857075432083e-06 2023-01-22 08:58:31.612875: step: 36/77, loss: 0.0003100955509580672 2023-01-22 08:58:32.891123: step: 40/77, loss: 0.031127629801630974 2023-01-22 08:58:34.177321: step: 44/77, loss: 0.00035606580786406994 2023-01-22 08:58:35.488833: step: 48/77, loss: 0.0004578246735036373 2023-01-22 08:58:36.744559: step: 52/77, loss: 0.005972975865006447 2023-01-22 08:58:38.057251: step: 56/77, loss: 0.0008780988864600658 2023-01-22 08:58:39.310776: step: 60/77, loss: 0.0060501242987811565 2023-01-22 08:58:40.613558: step: 64/77, loss: 1.1174272003700025e-05 2023-01-22 08:58:41.906165: step: 68/77, loss: 0.000842059263959527 2023-01-22 08:58:43.147295: step: 72/77, loss: 5.181662800168851e-06 2023-01-22 08:58:44.412817: step: 76/77, loss: 3.5017495747524663e-07 2023-01-22 08:58:45.714597: step: 80/77, loss: 0.00163843494374305 2023-01-22 08:58:46.995084: step: 84/77, loss: 1.0385938367107883e-06 2023-01-22 08:58:48.275197: step: 88/77, loss: 0.0030117020942270756 2023-01-22 08:58:49.539253: step: 92/77, loss: 7.071460004226537e-06 2023-01-22 08:58:50.856781: step: 96/77, loss: 0.00018569506937637925 2023-01-22 08:58:52.138013: step: 100/77, loss: 5.652254913002253e-06 2023-01-22 08:58:53.416669: step: 104/77, loss: 0.00032322845072485507 2023-01-22 08:58:54.689596: step: 108/77, loss: 0.000852587225381285 2023-01-22 08:58:55.974587: step: 112/77, loss: 2.957681135740131e-05 2023-01-22 08:58:57.244411: step: 116/77, loss: 7.405157703033183e-06 2023-01-22 08:58:58.541869: step: 120/77, loss: 1.4922447917342652e-05 2023-01-22 08:58:59.809645: step: 124/77, loss: 0.02082359604537487 2023-01-22 08:59:01.106057: step: 128/77, loss: 6.705517563432295e-08 2023-01-22 08:59:02.354496: step: 132/77, loss: 0.00026560970582067966 2023-01-22 08:59:03.634285: step: 136/77, loss: 1.1324837601023319e-07 2023-01-22 08:59:04.975011: step: 140/77, loss: 0.0005567181506194174 2023-01-22 08:59:06.187357: step: 144/77, loss: 0.0011684320634230971 2023-01-22 08:59:07.508048: step: 148/77, loss: 0.0008083868306130171 2023-01-22 08:59:08.776602: step: 152/77, loss: 0.00045696506276726723 2023-01-22 08:59:10.097371: step: 156/77, loss: 2.1207903046160936e-05 2023-01-22 08:59:11.371514: step: 160/77, loss: 0.0016548774437978864 2023-01-22 08:59:12.635970: step: 164/77, loss: 3.3676190014375607e-07 2023-01-22 08:59:13.943491: step: 168/77, loss: 1.5362414842456928e-06 2023-01-22 08:59:15.203117: step: 172/77, loss: 3.1292412216998855e-08 2023-01-22 08:59:16.471060: step: 176/77, loss: 0.0032324367202818394 2023-01-22 08:59:17.766199: step: 180/77, loss: 1.1079687283199746e-05 2023-01-22 08:59:19.028352: step: 184/77, loss: 0.0 2023-01-22 08:59:20.277450: step: 188/77, loss: 0.004545817617326975 2023-01-22 08:59:21.536519: step: 192/77, loss: 0.0003192027797922492 2023-01-22 08:59:22.860671: step: 196/77, loss: 0.012223941273987293 2023-01-22 08:59:24.161395: step: 200/77, loss: 4.3958004880551016e-07 2023-01-22 08:59:25.478526: step: 204/77, loss: 0.0162535160779953 2023-01-22 08:59:26.742589: step: 208/77, loss: 1.4048951925360598e-05 2023-01-22 08:59:28.014110: step: 212/77, loss: 3.874299991935004e-08 2023-01-22 08:59:29.314213: step: 216/77, loss: 5.006731953471899e-07 2023-01-22 08:59:30.624815: step: 220/77, loss: 0.009009288623929024 2023-01-22 08:59:31.903062: step: 224/77, loss: 2.1173013010411523e-05 2023-01-22 08:59:33.233917: step: 228/77, loss: 1.7163351003546268e-05 2023-01-22 08:59:34.506285: step: 232/77, loss: 0.00011633327085291967 2023-01-22 08:59:35.789017: step: 236/77, loss: 8.594244718551636e-05 2023-01-22 08:59:37.075680: step: 240/77, loss: 9.750283788889647e-05 2023-01-22 08:59:38.352202: step: 244/77, loss: 1.0430809105343997e-08 2023-01-22 08:59:39.679480: step: 248/77, loss: 1.50942094023776e-06 2023-01-22 08:59:40.972723: step: 252/77, loss: 3.769965246647189e-07 2023-01-22 08:59:42.276848: step: 256/77, loss: 4.2295978346373886e-05 2023-01-22 08:59:43.531152: step: 260/77, loss: 7.710335921728984e-06 2023-01-22 08:59:44.768537: step: 264/77, loss: 7.4505792646561986e-09 2023-01-22 08:59:46.051151: step: 268/77, loss: 5.066387842589393e-08 2023-01-22 08:59:47.341569: step: 272/77, loss: 2.1589827156276442e-05 2023-01-22 08:59:48.623923: step: 276/77, loss: 0.0001471538416808471 2023-01-22 08:59:49.947506: step: 280/77, loss: 3.8839229091536254e-05 2023-01-22 08:59:51.221075: step: 284/77, loss: 9.147621312877163e-06 2023-01-22 08:59:52.450044: step: 288/77, loss: 1.2099409332222422e-06 2023-01-22 08:59:53.732721: step: 292/77, loss: 0.00029746367363259196 2023-01-22 08:59:55.002283: step: 296/77, loss: 5.811446612824511e-08 2023-01-22 08:59:56.315007: step: 300/77, loss: 4.01707711716881e-06 2023-01-22 08:59:57.588143: step: 304/77, loss: 2.0085465166630456e-06 2023-01-22 08:59:58.896176: step: 308/77, loss: 8.670324859849643e-06 2023-01-22 09:00:00.200256: step: 312/77, loss: 0.016092410311102867 2023-01-22 09:00:01.473966: step: 316/77, loss: 1.2665944382206362e-07 2023-01-22 09:00:02.722505: step: 320/77, loss: 6.273311328186537e-07 2023-01-22 09:00:03.993189: step: 324/77, loss: 0.0018693513702601194 2023-01-22 09:00:05.294873: step: 328/77, loss: 2.8741076221194817e-06 2023-01-22 09:00:06.549116: step: 332/77, loss: 0.018234344199299812 2023-01-22 09:00:07.835212: step: 336/77, loss: 8.404001619055634e-07 2023-01-22 09:00:09.154735: step: 340/77, loss: 7.748588615186236e-08 2023-01-22 09:00:10.450142: step: 344/77, loss: 5.752379365731031e-06 2023-01-22 09:00:11.766341: step: 348/77, loss: 3.501755827528541e-07 2023-01-22 09:00:13.026713: step: 352/77, loss: 0.007864338345825672 2023-01-22 09:00:14.270018: step: 356/77, loss: 0.0035333663690835238 2023-01-22 09:00:15.547413: step: 360/77, loss: 9.834761272031756e-08 2023-01-22 09:00:16.855585: step: 364/77, loss: 1.4901159417490817e-08 2023-01-22 09:00:18.135318: step: 368/77, loss: 0.00010256327368551865 2023-01-22 09:00:19.423445: step: 372/77, loss: 6.437193178499001e-07 2023-01-22 09:00:20.753987: step: 376/77, loss: 4.33922978118062e-05 2023-01-22 09:00:22.063461: step: 380/77, loss: 9.536726963688125e-08 2023-01-22 09:00:23.328583: step: 384/77, loss: 2.984027196362149e-05 2023-01-22 09:00:24.616209: step: 388/77, loss: 1.1920904796625109e-07 ================================================== Loss: 0.003 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 26} Test Chinese: {'template': {'p': 0.8928571428571429, 'r': 0.5905511811023622, 'f1': 0.7109004739336493}, 'slot': {'p': 0.5652173913043478, 'r': 0.02249134948096886, 'f1': 0.04326123128119801}, 'combined': 0.03075442982075688, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 26} Test Korean: {'template': {'p': 0.9036144578313253, 'r': 0.5905511811023622, 'f1': 0.7142857142857142}, 'slot': {'p': 0.5909090909090909, 'r': 0.02249134948096886, 'f1': 0.043333333333333335}, 'combined': 0.03095238095238095, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 26} Test Russian: {'template': {'p': 0.9036144578313253, 'r': 0.5905511811023622, 'f1': 0.7142857142857142}, 'slot': {'p': 0.6046511627906976, 'r': 0.02249134948096886, 'f1': 0.043369474562135114}, 'combined': 0.03097819611581079, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 27 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:02:04.589512: step: 4/77, loss: 0.0009632143774069846 2023-01-22 09:02:05.842115: step: 8/77, loss: 9.709888399811462e-05 2023-01-22 09:02:07.123580: step: 12/77, loss: 7.260393886099337e-06 2023-01-22 09:02:08.348865: step: 16/77, loss: 2.3245681290973152e-07 2023-01-22 09:02:09.626441: step: 20/77, loss: 9.16340468393173e-06 2023-01-22 09:02:10.873345: step: 24/77, loss: 1.0147099601454102e-05 2023-01-22 09:02:12.203032: step: 28/77, loss: 0.005319915246218443 2023-01-22 09:02:13.464817: step: 32/77, loss: 1.7333022697130218e-05 2023-01-22 09:02:14.731784: step: 36/77, loss: 0.07323425263166428 2023-01-22 09:02:15.988523: step: 40/77, loss: 0.01170014776289463 2023-01-22 09:02:17.271209: step: 44/77, loss: 4.917379925473142e-08 2023-01-22 09:02:18.542780: step: 48/77, loss: 0.00034738657996058464 2023-01-22 09:02:19.773259: step: 52/77, loss: 1.9967454534253193e-07 2023-01-22 09:02:21.063114: step: 56/77, loss: 1.3398454029811546e-05 2023-01-22 09:02:22.324566: step: 60/77, loss: 0.012574520893394947 2023-01-22 09:02:23.637229: step: 64/77, loss: 8.59444207890192e-06 2023-01-22 09:02:24.961593: step: 68/77, loss: 8.180574582183908e-07 2023-01-22 09:02:26.245453: step: 72/77, loss: 5.900776045564271e-07 2023-01-22 09:02:27.525363: step: 76/77, loss: 1.0342812856833916e-05 2023-01-22 09:02:28.775275: step: 80/77, loss: 0.004377368837594986 2023-01-22 09:02:30.075930: step: 84/77, loss: 0.00016269163461402059 2023-01-22 09:02:31.383716: step: 88/77, loss: 2.8387847123667598e-05 2023-01-22 09:02:32.664135: step: 92/77, loss: 0.0003090954851359129 2023-01-22 09:02:33.945444: step: 96/77, loss: 4.673906005336903e-06 2023-01-22 09:02:35.197801: step: 100/77, loss: 0.00022745260503143072 2023-01-22 09:02:36.505027: step: 104/77, loss: 3.643586751422845e-05 2023-01-22 09:02:37.775796: step: 108/77, loss: 1.0860110705834813e-05 2023-01-22 09:02:39.113290: step: 112/77, loss: 0.007070006802678108 2023-01-22 09:02:40.384566: step: 116/77, loss: 0.017348483204841614 2023-01-22 09:02:41.670777: step: 120/77, loss: 4.704351886175573e-05 2023-01-22 09:02:42.991136: step: 124/77, loss: 0.018814612179994583 2023-01-22 09:02:44.281501: step: 128/77, loss: 0.0023173014633357525 2023-01-22 09:02:45.570041: step: 132/77, loss: 4.932226147502661e-07 2023-01-22 09:02:46.864243: step: 136/77, loss: 0.00010033223225036636 2023-01-22 09:02:48.171598: step: 140/77, loss: 3.6354311305331066e-05 2023-01-22 09:02:49.471110: step: 144/77, loss: 0.00012139079626649618 2023-01-22 09:02:50.780711: step: 148/77, loss: 2.742990545812063e-05 2023-01-22 09:02:52.089015: step: 152/77, loss: 0.025909584015607834 2023-01-22 09:02:53.413510: step: 156/77, loss: 1.2906899428344332e-05 2023-01-22 09:02:54.728470: step: 160/77, loss: 0.018763171508908272 2023-01-22 09:02:56.029369: step: 164/77, loss: 0.004122719168663025 2023-01-22 09:02:57.291170: step: 168/77, loss: 0.0005570485373027623 2023-01-22 09:02:58.569058: step: 172/77, loss: 0.013160121627151966 2023-01-22 09:02:59.804988: step: 176/77, loss: 0.00028207077411934733 2023-01-22 09:03:01.115285: step: 180/77, loss: 0.013939131051301956 2023-01-22 09:03:02.412106: step: 184/77, loss: 0.013419135473668575 2023-01-22 09:03:03.713520: step: 188/77, loss: 2.6850261747313198e-06 2023-01-22 09:03:04.982884: step: 192/77, loss: 3.027114325959701e-05 2023-01-22 09:03:06.295601: step: 196/77, loss: 2.2202648608526943e-07 2023-01-22 09:03:07.593838: step: 200/77, loss: 1.520577643532306e-05 2023-01-22 09:03:08.860268: step: 204/77, loss: 5.405565389082767e-06 2023-01-22 09:03:10.162415: step: 208/77, loss: 6.854527612176753e-08 2023-01-22 09:03:11.448661: step: 212/77, loss: 4.2942242544086184e-06 2023-01-22 09:03:12.718288: step: 216/77, loss: 7.763283633721585e-07 2023-01-22 09:03:14.005095: step: 220/77, loss: 4.440516079284862e-07 2023-01-22 09:03:15.260274: step: 224/77, loss: 1.5358633390860632e-05 2023-01-22 09:03:16.535765: step: 228/77, loss: 5.9604580826544407e-08 2023-01-22 09:03:17.815286: step: 232/77, loss: 0.000686664308886975 2023-01-22 09:03:19.076315: step: 236/77, loss: 0.033505942672491074 2023-01-22 09:03:20.403299: step: 240/77, loss: 1.3873767784389202e-05 2023-01-22 09:03:21.711182: step: 244/77, loss: 8.046392849792028e-07 2023-01-22 09:03:23.047987: step: 248/77, loss: 6.70079534756951e-05 2023-01-22 09:03:24.328685: step: 252/77, loss: 0.0006942515028640628 2023-01-22 09:03:25.567381: step: 256/77, loss: 0.0001692011283012107 2023-01-22 09:03:26.870599: step: 260/77, loss: 0.001478560152463615 2023-01-22 09:03:28.141351: step: 264/77, loss: 8.87875648913905e-05 2023-01-22 09:03:29.448886: step: 268/77, loss: 0.009863415732979774 2023-01-22 09:03:30.738536: step: 272/77, loss: 2.6446809897606727e-06 2023-01-22 09:03:32.030891: step: 276/77, loss: 5.823625087941764e-06 2023-01-22 09:03:33.289135: step: 280/77, loss: 0.007472109980881214 2023-01-22 09:03:34.549300: step: 284/77, loss: 0.000493723142426461 2023-01-22 09:03:35.869513: step: 288/77, loss: 3.874297149764061e-08 2023-01-22 09:03:37.170986: step: 292/77, loss: 1.3560013201185939e-07 2023-01-22 09:03:38.419827: step: 296/77, loss: 0.0008401316008530557 2023-01-22 09:03:39.712543: step: 300/77, loss: 0.0001201034028781578 2023-01-22 09:03:40.994436: step: 304/77, loss: 0.0036236299201846123 2023-01-22 09:03:42.309397: step: 308/77, loss: 9.447152820030169e-07 2023-01-22 09:03:43.594976: step: 312/77, loss: 1.7672722606221214e-05 2023-01-22 09:03:44.842608: step: 316/77, loss: 1.4962723071221262e-05 2023-01-22 09:03:46.170647: step: 320/77, loss: 3.774357173824683e-05 2023-01-22 09:03:47.460527: step: 324/77, loss: 6.529239180963486e-05 2023-01-22 09:03:48.728795: step: 328/77, loss: 3.069627894092264e-07 2023-01-22 09:03:50.043198: step: 332/77, loss: 0.01833205111324787 2023-01-22 09:03:51.300287: step: 336/77, loss: 0.0007400041213259101 2023-01-22 09:03:52.589396: step: 340/77, loss: 0.00012709743168670684 2023-01-22 09:03:53.864816: step: 344/77, loss: 2.483945536368992e-06 2023-01-22 09:03:55.160466: step: 348/77, loss: 3.844440925604431e-07 2023-01-22 09:03:56.493079: step: 352/77, loss: 0.0030930594075471163 2023-01-22 09:03:57.737804: step: 356/77, loss: 0.0015991883119568229 2023-01-22 09:03:59.056087: step: 360/77, loss: 1.031789361150004e-05 2023-01-22 09:04:00.358242: step: 364/77, loss: 1.4349502635013778e-06 2023-01-22 09:04:01.630787: step: 368/77, loss: 2.8228296287124977e-05 2023-01-22 09:04:02.933459: step: 372/77, loss: 7.927232559268305e-07 2023-01-22 09:04:04.216407: step: 376/77, loss: 0.0005354603636078537 2023-01-22 09:04:05.476262: step: 380/77, loss: 0.00016630259051453322 2023-01-22 09:04:06.743433: step: 384/77, loss: 0.017812933772802353 2023-01-22 09:04:08.007653: step: 388/77, loss: 1.934991087182425e-05 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Chinese: {'template': {'p': 0.9358974358974359, 'r': 0.5748031496062992, 'f1': 0.7121951219512195}, 'slot': {'p': 0.5897435897435898, 'r': 0.019896193771626297, 'f1': 0.03849372384937238}, 'combined': 0.027415042351260327, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Korean: {'template': {'p': 0.9230769230769231, 'r': 0.5669291338582677, 'f1': 0.7024390243902439}, 'slot': {'p': 0.5897435897435898, 'r': 0.019896193771626297, 'f1': 0.03849372384937238}, 'combined': 0.027039493825900596, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 27} Test Russian: {'template': {'p': 0.9358974358974359, 'r': 0.5748031496062992, 'f1': 0.7121951219512195}, 'slot': {'p': 0.5897435897435898, 'r': 0.019896193771626297, 'f1': 0.03849372384937238}, 'combined': 0.027415042351260327, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 28 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:05:50.648334: step: 4/77, loss: 0.0021865300368517637 2023-01-22 09:05:51.961566: step: 8/77, loss: 0.00019810137746389955 2023-01-22 09:05:53.223303: step: 12/77, loss: 0.004027987364679575 2023-01-22 09:05:54.482867: step: 16/77, loss: 3.8547568692592904e-05 2023-01-22 09:05:55.747287: step: 20/77, loss: 0.00019763948512263596 2023-01-22 09:05:56.996899: step: 24/77, loss: 0.015152830630540848 2023-01-22 09:05:58.237018: step: 28/77, loss: 0.00030233588768169284 2023-01-22 09:05:59.521688: step: 32/77, loss: 0.0005768241826444864 2023-01-22 09:06:00.815359: step: 36/77, loss: 1.628660584174213e-06 2023-01-22 09:06:02.053325: step: 40/77, loss: 6.025625407346524e-05 2023-01-22 09:06:03.319914: step: 44/77, loss: 8.51103959575994e-06 2023-01-22 09:06:04.616858: step: 48/77, loss: 1.4007048321218463e-07 2023-01-22 09:06:05.923261: step: 52/77, loss: 2.3750601030769758e-05 2023-01-22 09:06:07.243750: step: 56/77, loss: 0.002472936175763607 2023-01-22 09:06:08.523935: step: 60/77, loss: 0.00030853349016979337 2023-01-22 09:06:09.805931: step: 64/77, loss: 2.0950446923961863e-06 2023-01-22 09:06:11.103764: step: 68/77, loss: 2.771506615317776e-06 2023-01-22 09:06:12.390450: step: 72/77, loss: 4.5486776798497885e-06 2023-01-22 09:06:13.657773: step: 76/77, loss: 4.2551237129373476e-05 2023-01-22 09:06:14.926462: step: 80/77, loss: 0.00015797361265867949 2023-01-22 09:06:16.225597: step: 84/77, loss: 6.729036977048963e-05 2023-01-22 09:06:17.558914: step: 88/77, loss: 0.0012200362980365753 2023-01-22 09:06:18.850517: step: 92/77, loss: 1.1205382861589896e-06 2023-01-22 09:06:20.178486: step: 96/77, loss: 0.0009553980198688805 2023-01-22 09:06:21.469837: step: 100/77, loss: 3.157766695949249e-05 2023-01-22 09:06:22.743872: step: 104/77, loss: 2.947140046671848e-06 2023-01-22 09:06:24.075534: step: 108/77, loss: 0.0001485990360379219 2023-01-22 09:06:25.356467: step: 112/77, loss: 3.3513078960822895e-05 2023-01-22 09:06:26.629786: step: 116/77, loss: 0.001428778632543981 2023-01-22 09:06:27.942037: step: 120/77, loss: 6.617177405132679e-06 2023-01-22 09:06:29.180856: step: 124/77, loss: 3.6655364965554327e-06 2023-01-22 09:06:30.441131: step: 128/77, loss: 4.7577421355526894e-05 2023-01-22 09:06:31.680998: step: 132/77, loss: 0.005376014858484268 2023-01-22 09:06:32.999634: step: 136/77, loss: 0.03443664684891701 2023-01-22 09:06:34.297362: step: 140/77, loss: 0.00010895886225625873 2023-01-22 09:06:35.607687: step: 144/77, loss: 5.1116644499416e-06 2023-01-22 09:06:36.905793: step: 148/77, loss: 0.012373952195048332 2023-01-22 09:06:38.175661: step: 152/77, loss: 0.0011482408735901117 2023-01-22 09:06:39.467357: step: 156/77, loss: 0.0007012872956693172 2023-01-22 09:06:40.758485: step: 160/77, loss: 8.746685011828959e-07 2023-01-22 09:06:42.015013: step: 164/77, loss: 9.193700520881976e-07 2023-01-22 09:06:43.306200: step: 168/77, loss: 2.2305810034595197e-06 2023-01-22 09:06:44.564605: step: 172/77, loss: 0.001875612186267972 2023-01-22 09:06:45.881059: step: 176/77, loss: 6.929267783561954e-06 2023-01-22 09:06:47.141232: step: 180/77, loss: 8.188005449483171e-05 2023-01-22 09:06:48.404645: step: 184/77, loss: 2.8312191702184464e-08 2023-01-22 09:06:49.744169: step: 188/77, loss: 0.0007012022542767227 2023-01-22 09:06:51.030384: step: 192/77, loss: 4.4859907575300895e-06 2023-01-22 09:06:52.293846: step: 196/77, loss: 0.011081600561738014 2023-01-22 09:06:53.638760: step: 200/77, loss: 0.0005675565917044878 2023-01-22 09:06:54.904415: step: 204/77, loss: 0.0019754134118556976 2023-01-22 09:06:56.194067: step: 208/77, loss: 0.0015090866945683956 2023-01-22 09:06:57.501545: step: 212/77, loss: 2.6964096832671203e-05 2023-01-22 09:06:58.768611: step: 216/77, loss: 4.87226225232007e-06 2023-01-22 09:07:00.039444: step: 220/77, loss: 4.604378318617819e-07 2023-01-22 09:07:01.332627: step: 224/77, loss: 2.056343504364122e-07 2023-01-22 09:07:02.585612: step: 228/77, loss: 3.175208257744089e-05 2023-01-22 09:07:03.870674: step: 232/77, loss: 0.0016991746379062533 2023-01-22 09:07:05.140896: step: 236/77, loss: 1.1633360372798052e-05 2023-01-22 09:07:06.445854: step: 240/77, loss: 1.2814975036690157e-07 2023-01-22 09:07:07.713011: step: 244/77, loss: 1.141123993875226e-05 2023-01-22 09:07:08.982446: step: 248/77, loss: 0.0002870917378459126 2023-01-22 09:07:10.280265: step: 252/77, loss: 0.004951559007167816 2023-01-22 09:07:11.555297: step: 256/77, loss: 7.301561311123805e-08 2023-01-22 09:07:12.869873: step: 260/77, loss: 5.036663424107246e-05 2023-01-22 09:07:14.168128: step: 264/77, loss: 0.0 2023-01-22 09:07:15.459744: step: 268/77, loss: 9.910161679727025e-06 2023-01-22 09:07:16.742805: step: 272/77, loss: 0.00963597558438778 2023-01-22 09:07:18.054846: step: 276/77, loss: 0.00782719161361456 2023-01-22 09:07:19.341567: step: 280/77, loss: 8.329643037541246e-07 2023-01-22 09:07:20.639219: step: 284/77, loss: 0.00023496760695707053 2023-01-22 09:07:21.888957: step: 288/77, loss: 0.0019654352217912674 2023-01-22 09:07:23.199262: step: 292/77, loss: 5.9508392951102e-06 2023-01-22 09:07:24.485874: step: 296/77, loss: 1.2933653579239035e-06 2023-01-22 09:07:25.798805: step: 300/77, loss: 1.749242983350996e-05 2023-01-22 09:07:27.056188: step: 304/77, loss: 0.0001417692838003859 2023-01-22 09:07:28.337227: step: 308/77, loss: 0.0005447689909487963 2023-01-22 09:07:29.635726: step: 312/77, loss: 0.0003403635055292398 2023-01-22 09:07:30.902804: step: 316/77, loss: 1.9222362368509494e-07 2023-01-22 09:07:32.187781: step: 320/77, loss: 3.882642431562999e-06 2023-01-22 09:07:33.494437: step: 324/77, loss: 0.001855060108937323 2023-01-22 09:07:34.738618: step: 328/77, loss: 0.00011588455527089536 2023-01-22 09:07:36.046801: step: 332/77, loss: 2.5062854547286406e-05 2023-01-22 09:07:37.334400: step: 336/77, loss: 4.14246784430361e-07 2023-01-22 09:07:38.679327: step: 340/77, loss: 1.247184968633519e-06 2023-01-22 09:07:39.978195: step: 344/77, loss: 0.00025223763077519834 2023-01-22 09:07:41.235474: step: 348/77, loss: 8.014580089366063e-05 2023-01-22 09:07:42.550172: step: 352/77, loss: 1.4408777815333451e-06 2023-01-22 09:07:43.860809: step: 356/77, loss: 0.0001089065262931399 2023-01-22 09:07:45.125126: step: 360/77, loss: 0.000887332484126091 2023-01-22 09:07:46.461608: step: 364/77, loss: 2.2855980205349624e-05 2023-01-22 09:07:47.740713: step: 368/77, loss: 0.00034089345717802644 2023-01-22 09:07:49.030669: step: 372/77, loss: 0.000578397186473012 2023-01-22 09:07:50.301787: step: 376/77, loss: 0.012201877310872078 2023-01-22 09:07:51.635640: step: 380/77, loss: 0.0033788299188017845 2023-01-22 09:07:52.917890: step: 384/77, loss: 0.0006222074152901769 2023-01-22 09:07:54.201494: step: 388/77, loss: 7.241784487632685e-07 ================================================== Loss: 0.002 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Chinese: {'template': {'p': 0.9342105263157895, 'r': 0.5590551181102362, 'f1': 0.6995073891625616}, 'slot': {'p': 0.5434782608695652, 'r': 0.02162629757785467, 'f1': 0.041597337770382686}, 'combined': 0.029097645139873604, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Korean: {'template': {'p': 0.9324324324324325, 'r': 0.5433070866141733, 'f1': 0.6865671641791046}, 'slot': {'p': 0.5434782608695652, 'r': 0.02162629757785467, 'f1': 0.041597337770382686}, 'combined': 0.028559366230411998, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Russian: {'template': {'p': 0.9324324324324325, 'r': 0.5433070866141733, 'f1': 0.6865671641791046}, 'slot': {'p': 0.5319148936170213, 'r': 0.02162629757785467, 'f1': 0.04156275976724854}, 'combined': 0.02853562610885721, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2} ****************************** Epoch: 29 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:09:34.013910: step: 4/77, loss: 8.651711141283158e-06 2023-01-22 09:09:35.345508: step: 8/77, loss: 1.0613646736601368e-05 2023-01-22 09:09:36.630727: step: 12/77, loss: 1.1698984053509776e-05 2023-01-22 09:09:37.929857: step: 16/77, loss: 6.8453396124823485e-06 2023-01-22 09:09:39.238994: step: 20/77, loss: 0.058387961238622665 2023-01-22 09:09:40.514457: step: 24/77, loss: 2.6822082332955688e-08 2023-01-22 09:09:41.744748: step: 28/77, loss: 2.282654349983204e-06 2023-01-22 09:09:43.032040: step: 32/77, loss: 0.004614799749106169 2023-01-22 09:09:44.323785: step: 36/77, loss: 6.8609083427872974e-06 2023-01-22 09:09:45.576910: step: 40/77, loss: 0.0008533053332939744 2023-01-22 09:09:46.869843: step: 44/77, loss: 0.0052756210789084435 2023-01-22 09:09:48.148382: step: 48/77, loss: 3.4868492093664827e-07 2023-01-22 09:09:49.430556: step: 52/77, loss: 3.491688403300941e-05 2023-01-22 09:09:50.720544: step: 56/77, loss: 0.006140722893178463 2023-01-22 09:09:51.985955: step: 60/77, loss: 3.100781759712845e-05 2023-01-22 09:09:53.274199: step: 64/77, loss: 3.0994272037787596e-07 2023-01-22 09:09:54.517870: step: 68/77, loss: 0.0008896092767827213 2023-01-22 09:09:55.825862: step: 72/77, loss: 0.009300212375819683 2023-01-22 09:09:57.101714: step: 76/77, loss: 0.0004132104222662747 2023-01-22 09:09:58.398761: step: 80/77, loss: 6.638202648900915e-06 2023-01-22 09:09:59.659932: step: 84/77, loss: 0.0011040932731702924 2023-01-22 09:10:00.959931: step: 88/77, loss: 1.318868908128934e-05 2023-01-22 09:10:02.259884: step: 92/77, loss: 4.4553686961990024e-07 2023-01-22 09:10:03.534542: step: 96/77, loss: 2.3648008209420368e-05 2023-01-22 09:10:04.863099: step: 100/77, loss: 0.00017289724200963974 2023-01-22 09:10:06.163846: step: 104/77, loss: 0.0002529393823351711 2023-01-22 09:10:07.464356: step: 108/77, loss: 9.019220669870265e-06 2023-01-22 09:10:08.730130: step: 112/77, loss: 5.823463652632199e-05 2023-01-22 09:10:09.987064: step: 116/77, loss: 9.745178886078065e-07 2023-01-22 09:10:11.247911: step: 120/77, loss: 4.057395926793106e-06 2023-01-22 09:10:12.531342: step: 124/77, loss: 0.0417637974023819 2023-01-22 09:10:13.820487: step: 128/77, loss: 4.908620030619204e-05 2023-01-22 09:10:15.119831: step: 132/77, loss: 2.5956498575396836e-05 2023-01-22 09:10:16.392490: step: 136/77, loss: 0.0 2023-01-22 09:10:17.672039: step: 140/77, loss: 1.0787980500026606e-06 2023-01-22 09:10:18.940648: step: 144/77, loss: 7.003539082006682e-08 2023-01-22 09:10:20.252250: step: 148/77, loss: 2.9099076073180186e-06 2023-01-22 09:10:21.561637: step: 152/77, loss: 7.658990739400906e-07 2023-01-22 09:10:22.934283: step: 156/77, loss: 0.0002760235802270472 2023-01-22 09:10:24.231131: step: 160/77, loss: 2.2328616978484206e-05 2023-01-22 09:10:25.524876: step: 164/77, loss: 1.5079613149282522e-06 2023-01-22 09:10:26.791838: step: 168/77, loss: 7.09287860445329e-07 2023-01-22 09:10:28.079404: step: 172/77, loss: 6.417190888896585e-05 2023-01-22 09:10:29.391698: step: 176/77, loss: 1.533610702608712e-05 2023-01-22 09:10:30.698997: step: 180/77, loss: 0.00015153044660110027 2023-01-22 09:10:31.963387: step: 184/77, loss: 0.00015654291200917214 2023-01-22 09:10:33.257806: step: 188/77, loss: 2.3394709103286004e-07 2023-01-22 09:10:34.512285: step: 192/77, loss: 0.0001352376857539639 2023-01-22 09:10:35.822453: step: 196/77, loss: 3.8812271668575704e-05 2023-01-22 09:10:37.087706: step: 200/77, loss: 0.41700997948646545 2023-01-22 09:10:38.371450: step: 204/77, loss: 9.223648476108792e-07 2023-01-22 09:10:39.655016: step: 208/77, loss: 0.0008059104438871145 2023-01-22 09:10:40.960580: step: 212/77, loss: 4.970795998815447e-05 2023-01-22 09:10:42.204373: step: 216/77, loss: 0.0023068408481776714 2023-01-22 09:10:43.506425: step: 220/77, loss: 0.011769354343414307 2023-01-22 09:10:44.792633: step: 224/77, loss: 1.411839184584096e-05 2023-01-22 09:10:46.043573: step: 228/77, loss: 0.0033866390585899353 2023-01-22 09:10:47.327493: step: 232/77, loss: 0.0005937239620834589 2023-01-22 09:10:48.637101: step: 236/77, loss: 1.1905756309715798e-06 2023-01-22 09:10:49.956572: step: 240/77, loss: 0.05240585654973984 2023-01-22 09:10:51.240904: step: 244/77, loss: 8.177459676517174e-05 2023-01-22 09:10:52.538115: step: 248/77, loss: 0.03990630805492401 2023-01-22 09:10:53.794145: step: 252/77, loss: 0.009225370362401009 2023-01-22 09:10:55.071085: step: 256/77, loss: 0.004749360494315624 2023-01-22 09:10:56.382065: step: 260/77, loss: 0.04941349849104881 2023-01-22 09:10:57.678451: step: 264/77, loss: 0.0037642589304596186 2023-01-22 09:10:58.927032: step: 268/77, loss: 5.3255724196787924e-05 2023-01-22 09:11:00.206265: step: 272/77, loss: 1.9163348042638972e-05 2023-01-22 09:11:01.496794: step: 276/77, loss: 2.8681495223281672e-06 2023-01-22 09:11:02.835163: step: 280/77, loss: 2.086154324842937e-07 2023-01-22 09:11:04.143563: step: 284/77, loss: 3.7699453514505876e-07 2023-01-22 09:11:05.410391: step: 288/77, loss: 2.2428501324611716e-05 2023-01-22 09:11:06.702221: step: 292/77, loss: 0.0074419789016246796 2023-01-22 09:11:07.981074: step: 296/77, loss: 3.7662855902453884e-05 2023-01-22 09:11:09.230184: step: 300/77, loss: 1.2514733498392161e-05 2023-01-22 09:11:10.494797: step: 304/77, loss: 0.0006708307191729546 2023-01-22 09:11:11.775011: step: 308/77, loss: 0.00016213061462622136 2023-01-22 09:11:13.056656: step: 312/77, loss: 0.017585409805178642 2023-01-22 09:11:14.331452: step: 316/77, loss: 0.0018693305319175124 2023-01-22 09:11:15.569008: step: 320/77, loss: 2.6822082332955688e-08 2023-01-22 09:11:16.872171: step: 324/77, loss: 2.0861621763401672e-08 2023-01-22 09:11:18.197746: step: 328/77, loss: 0.0006003840826451778 2023-01-22 09:11:19.491173: step: 332/77, loss: 0.0033928346820175648 2023-01-22 09:11:20.765136: step: 336/77, loss: 1.1474237908259965e-05 2023-01-22 09:11:22.034927: step: 340/77, loss: 3.406353062018752e-05 2023-01-22 09:11:23.319708: step: 344/77, loss: 1.2427079809640418e-06 2023-01-22 09:11:24.589557: step: 348/77, loss: 0.0001311218657065183 2023-01-22 09:11:25.863170: step: 352/77, loss: 0.0008393143652938306 2023-01-22 09:11:27.167055: step: 356/77, loss: 3.580274551495677e-06 2023-01-22 09:11:28.447645: step: 360/77, loss: 3.2870757422642782e-06 2023-01-22 09:11:29.711269: step: 364/77, loss: 3.6221701975591714e-06 2023-01-22 09:11:30.977413: step: 368/77, loss: 5.960463678178485e-09 2023-01-22 09:11:32.304162: step: 372/77, loss: 0.002523561008274555 2023-01-22 09:11:33.555736: step: 376/77, loss: 1.508878904132871e-05 2023-01-22 09:11:34.856891: step: 380/77, loss: 6.556501119803215e-08 2023-01-22 09:11:36.140947: step: 384/77, loss: 2.235172225084625e-08 2023-01-22 09:11:37.446235: step: 388/77, loss: 5.226740995567525e-06 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 29} Test Chinese: {'template': {'p': 0.9146341463414634, 'r': 0.5905511811023622, 'f1': 0.7177033492822966}, 'slot': {'p': 0.5652173913043478, 'r': 0.02249134948096886, 'f1': 0.04326123128119801}, 'combined': 0.031048730584591868, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 29} Test Korean: {'template': {'p': 0.9146341463414634, 'r': 0.5905511811023622, 'f1': 0.7177033492822966}, 'slot': {'p': 0.5531914893617021, 'r': 0.02249134948096886, 'f1': 0.04322527015793849}, 'combined': 0.03102292116598456, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 29} Test Russian: {'template': {'p': 0.9146341463414634, 'r': 0.5905511811023622, 'f1': 0.7177033492822966}, 'slot': {'p': 0.5652173913043478, 'r': 0.02249134948096886, 'f1': 0.04326123128119801}, 'combined': 0.031048730584591868, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5511811023622047, 'f1': 0.7035175879396985}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02130078483248877, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5294117647058824, 'r': 0.015570934256055362, 'f1': 0.030252100840336135}, 'combined': 0.021478991596638655, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5590551181102362, 'f1': 0.71}, 'slot': {'p': 0.5454545454545454, 'r': 0.015570934256055362, 'f1': 0.03027754415475189}, 'combined': 0.02149705634987384, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 2}