Command that produces this log: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> trans_rep.weight: torch.Size([1024, 2048]) >>> trans_rep.bias: torch.Size([1024]) >>> hidden_ffns.Corruplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Corruplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Cybercrimeplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Cybercrimeplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Disasterplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Disasterplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Displacementplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Displacementplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Epidemiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Epidemiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Etiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Etiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Protestplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Protestplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Terrorplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Terrorplate.layers.0.bias: torch.Size([768]) >>> template_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> type_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Corruplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Corruplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Disasterplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Disasterplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Displacementplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Displacementplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Epidemiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Epidemiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Etiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Etiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Protestplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Protestplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Terrorplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Terrorplate.layers.1.bias: torch.Size([6]) >>> completion_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Corruplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Corruplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Disasterplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Disasterplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Displacementplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Displacementplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Epidemiplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Epidemiplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Etiplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Etiplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Protestplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Protestplate.layers.1.bias: torch.Size([3]) >>> completion_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Terrorplate.layers.1.weight: torch.Size([3, 450]) >>> completion_classifiers.Terrorplate.layers.1.bias: torch.Size([3]) >>> overtime_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) n_trainable_params: 582182328, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:09:11.280599: step: 4/77, loss: 1.005326271057129 2023-01-22 07:09:12.565124: step: 8/77, loss: 0.9969702959060669 2023-01-22 07:09:13.807804: step: 12/77, loss: 0.9919466376304626 2023-01-22 07:09:15.051691: step: 16/77, loss: 0.9922894239425659 2023-01-22 07:09:16.312268: step: 20/77, loss: 0.987234354019165 2023-01-22 07:09:17.587473: step: 24/77, loss: 0.995108962059021 2023-01-22 07:09:18.880358: step: 28/77, loss: 0.9919507503509521 2023-01-22 07:09:20.185018: step: 32/77, loss: 0.9804954528808594 2023-01-22 07:09:21.464416: step: 36/77, loss: 0.9758777618408203 2023-01-22 07:09:22.732925: step: 40/77, loss: 0.9724856615066528 2023-01-22 07:09:24.043836: step: 44/77, loss: 0.9727693796157837 2023-01-22 07:09:25.315233: step: 48/77, loss: 0.9606623649597168 2023-01-22 07:09:26.574015: step: 52/77, loss: 0.9492292404174805 2023-01-22 07:09:27.884759: step: 56/77, loss: 0.9366544485092163 2023-01-22 07:09:29.175470: step: 60/77, loss: 0.9421094655990601 2023-01-22 07:09:30.466705: step: 64/77, loss: 0.914215624332428 2023-01-22 07:09:31.763564: step: 68/77, loss: 0.9166408777236938 2023-01-22 07:09:33.038051: step: 72/77, loss: 0.8890451788902283 2023-01-22 07:09:34.306604: step: 76/77, loss: 0.888709306716919 2023-01-22 07:09:35.573325: step: 80/77, loss: 0.8704376220703125 2023-01-22 07:09:36.866818: step: 84/77, loss: 0.8614633083343506 2023-01-22 07:09:38.172141: step: 88/77, loss: 0.8560481071472168 2023-01-22 07:09:39.456326: step: 92/77, loss: 0.8408702611923218 2023-01-22 07:09:40.713155: step: 96/77, loss: 0.8011143803596497 2023-01-22 07:09:41.988022: step: 100/77, loss: 0.8109110593795776 2023-01-22 07:09:43.285440: step: 104/77, loss: 0.7847245335578918 2023-01-22 07:09:44.575748: step: 108/77, loss: 0.7961324453353882 2023-01-22 07:09:45.847828: step: 112/77, loss: 0.7659816145896912 2023-01-22 07:09:47.127433: step: 116/77, loss: 0.7412824034690857 2023-01-22 07:09:48.446380: step: 120/77, loss: 0.7645125985145569 2023-01-22 07:09:49.696556: step: 124/77, loss: 0.7162237167358398 2023-01-22 07:09:51.013083: step: 128/77, loss: 0.6767681837081909 2023-01-22 07:09:52.390153: step: 132/77, loss: 0.6699859499931335 2023-01-22 07:09:53.649945: step: 136/77, loss: 0.6623581051826477 2023-01-22 07:09:54.959811: step: 140/77, loss: 0.5981768369674683 2023-01-22 07:09:56.267294: step: 144/77, loss: 0.5953983068466187 2023-01-22 07:09:57.550104: step: 148/77, loss: 0.5925966501235962 2023-01-22 07:09:58.807094: step: 152/77, loss: 0.5283046364784241 2023-01-22 07:10:00.125070: step: 156/77, loss: 0.5977451205253601 2023-01-22 07:10:01.456742: step: 160/77, loss: 0.5185505747795105 2023-01-22 07:10:02.805202: step: 164/77, loss: 0.48585712909698486 2023-01-22 07:10:04.138999: step: 168/77, loss: 0.48655766248703003 2023-01-22 07:10:05.405271: step: 172/77, loss: 0.4535609185695648 2023-01-22 07:10:06.694995: step: 176/77, loss: 0.4237361252307892 2023-01-22 07:10:08.003470: step: 180/77, loss: 0.4515753388404846 2023-01-22 07:10:09.240822: step: 184/77, loss: 0.3867449164390564 2023-01-22 07:10:10.479671: step: 188/77, loss: 0.34473663568496704 2023-01-22 07:10:11.779164: step: 192/77, loss: 0.32582467794418335 2023-01-22 07:10:13.082505: step: 196/77, loss: 0.3416479825973511 2023-01-22 07:10:14.402027: step: 200/77, loss: 0.38761529326438904 2023-01-22 07:10:15.696981: step: 204/77, loss: 0.33058345317840576 2023-01-22 07:10:17.005723: step: 208/77, loss: 0.30359816551208496 2023-01-22 07:10:18.308608: step: 212/77, loss: 0.23331186175346375 2023-01-22 07:10:19.618026: step: 216/77, loss: 0.2237580269575119 2023-01-22 07:10:20.967357: step: 220/77, loss: 0.1935083121061325 2023-01-22 07:10:22.294134: step: 224/77, loss: 0.20559267699718475 2023-01-22 07:10:23.577286: step: 228/77, loss: 0.2811965048313141 2023-01-22 07:10:24.852118: step: 232/77, loss: 0.1479192078113556 2023-01-22 07:10:26.186720: step: 236/77, loss: 0.1190163865685463 2023-01-22 07:10:27.498490: step: 240/77, loss: 0.08200334012508392 2023-01-22 07:10:28.805971: step: 244/77, loss: 0.10144772380590439 2023-01-22 07:10:30.099159: step: 248/77, loss: 0.1776786744594574 2023-01-22 07:10:31.419346: step: 252/77, loss: 0.09870608150959015 2023-01-22 07:10:32.694696: step: 256/77, loss: 0.07344207912683487 2023-01-22 07:10:33.993812: step: 260/77, loss: 0.13493704795837402 2023-01-22 07:10:35.271467: step: 264/77, loss: 0.112431600689888 2023-01-22 07:10:36.563989: step: 268/77, loss: 0.10087061673402786 2023-01-22 07:10:37.858283: step: 272/77, loss: 0.07023502141237259 2023-01-22 07:10:39.195796: step: 276/77, loss: 0.11176516115665436 2023-01-22 07:10:40.520983: step: 280/77, loss: 0.08656412363052368 2023-01-22 07:10:41.827319: step: 284/77, loss: 0.09016451239585876 2023-01-22 07:10:43.143576: step: 288/77, loss: 0.20893847942352295 2023-01-22 07:10:44.420390: step: 292/77, loss: 0.08841076493263245 2023-01-22 07:10:45.716622: step: 296/77, loss: 0.20717525482177734 2023-01-22 07:10:46.989353: step: 300/77, loss: 0.14878374338150024 2023-01-22 07:10:48.311213: step: 304/77, loss: 0.16760967671871185 2023-01-22 07:10:49.607296: step: 308/77, loss: 0.0642714574933052 2023-01-22 07:10:50.915503: step: 312/77, loss: 0.10876336693763733 2023-01-22 07:10:52.174883: step: 316/77, loss: 0.06730751693248749 2023-01-22 07:10:53.519155: step: 320/77, loss: 0.0801793783903122 2023-01-22 07:10:54.785803: step: 324/77, loss: 0.14890411496162415 2023-01-22 07:10:56.103950: step: 328/77, loss: 0.19140523672103882 2023-01-22 07:10:57.349444: step: 332/77, loss: 0.12841562926769257 2023-01-22 07:10:58.636184: step: 336/77, loss: 0.06181987002491951 2023-01-22 07:10:59.918931: step: 340/77, loss: 0.07434514164924622 2023-01-22 07:11:01.201417: step: 344/77, loss: 0.058149345219135284 2023-01-22 07:11:02.547276: step: 348/77, loss: 0.12842099368572235 2023-01-22 07:11:03.882716: step: 352/77, loss: 0.06279709935188293 2023-01-22 07:11:05.159590: step: 356/77, loss: 0.10988639295101166 2023-01-22 07:11:06.456333: step: 360/77, loss: 0.059507448226213455 2023-01-22 07:11:07.736211: step: 364/77, loss: 0.07639916241168976 2023-01-22 07:11:08.985626: step: 368/77, loss: 0.07576992362737656 2023-01-22 07:11:10.262286: step: 372/77, loss: 0.11858648806810379 2023-01-22 07:11:11.547654: step: 376/77, loss: 0.16270530223846436 2023-01-22 07:11:12.853088: step: 380/77, loss: 0.10745513439178467 2023-01-22 07:11:14.151975: step: 384/77, loss: 0.07845322042703629 2023-01-22 07:11:15.402686: step: 388/77, loss: 0.0594959557056427 ================================================== Loss: 0.449 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:13:14.188907: step: 4/77, loss: 0.07319396734237671 2023-01-22 07:13:15.454412: step: 8/77, loss: 0.2127627432346344 2023-01-22 07:13:16.716916: step: 12/77, loss: 0.06945189833641052 2023-01-22 07:13:17.986962: step: 16/77, loss: 0.06614106148481369 2023-01-22 07:13:19.278371: step: 20/77, loss: 0.12928959727287292 2023-01-22 07:13:20.565563: step: 24/77, loss: 0.06803569197654724 2023-01-22 07:13:21.875147: step: 28/77, loss: 0.11719777435064316 2023-01-22 07:13:23.122985: step: 32/77, loss: 0.08039490878582001 2023-01-22 07:13:24.412498: step: 36/77, loss: 0.08408656716346741 2023-01-22 07:13:25.683463: step: 40/77, loss: 0.16345737874507904 2023-01-22 07:13:26.997863: step: 44/77, loss: 0.16177189350128174 2023-01-22 07:13:28.236657: step: 48/77, loss: 0.06657033413648605 2023-01-22 07:13:29.526144: step: 52/77, loss: 0.11178885400295258 2023-01-22 07:13:30.804359: step: 56/77, loss: 0.17989614605903625 2023-01-22 07:13:32.083859: step: 60/77, loss: 0.11134673655033112 2023-01-22 07:13:33.395477: step: 64/77, loss: 0.1450967788696289 2023-01-22 07:13:34.682493: step: 68/77, loss: 0.06846902519464493 2023-01-22 07:13:35.938673: step: 72/77, loss: 0.09169869869947433 2023-01-22 07:13:37.257855: step: 76/77, loss: 0.17338258028030396 2023-01-22 07:13:38.534092: step: 80/77, loss: 0.06149708479642868 2023-01-22 07:13:39.834423: step: 84/77, loss: 0.03807852417230606 2023-01-22 07:13:41.153021: step: 88/77, loss: 0.13195359706878662 2023-01-22 07:13:42.413870: step: 92/77, loss: 0.13806062936782837 2023-01-22 07:13:43.690028: step: 96/77, loss: 0.07052130997180939 2023-01-22 07:13:44.952889: step: 100/77, loss: 0.07880409061908722 2023-01-22 07:13:46.253194: step: 104/77, loss: 0.11023557186126709 2023-01-22 07:13:47.544438: step: 108/77, loss: 0.054397180676460266 2023-01-22 07:13:48.867280: step: 112/77, loss: 0.055380046367645264 2023-01-22 07:13:50.166666: step: 116/77, loss: 0.029421858489513397 2023-01-22 07:13:51.413675: step: 120/77, loss: 0.13612958788871765 2023-01-22 07:13:52.685547: step: 124/77, loss: 0.055370356887578964 2023-01-22 07:13:53.948355: step: 128/77, loss: 0.09766070544719696 2023-01-22 07:13:55.217726: step: 132/77, loss: 0.25343966484069824 2023-01-22 07:13:56.533720: step: 136/77, loss: 0.20380039513111115 2023-01-22 07:13:57.812143: step: 140/77, loss: 0.08381229639053345 2023-01-22 07:13:59.048434: step: 144/77, loss: 0.08572602272033691 2023-01-22 07:14:00.382651: step: 148/77, loss: 0.09957505017518997 2023-01-22 07:14:01.666408: step: 152/77, loss: 0.1330590844154358 2023-01-22 07:14:02.962338: step: 156/77, loss: 0.05906382203102112 2023-01-22 07:14:04.236667: step: 160/77, loss: 0.061597198247909546 2023-01-22 07:14:05.534520: step: 164/77, loss: 0.09688493609428406 2023-01-22 07:14:06.828052: step: 168/77, loss: 0.10050778836011887 2023-01-22 07:14:08.120764: step: 172/77, loss: 0.0780385434627533 2023-01-22 07:14:09.415682: step: 176/77, loss: 0.10098770260810852 2023-01-22 07:14:10.656501: step: 180/77, loss: 0.0765882357954979 2023-01-22 07:14:11.993891: step: 184/77, loss: 0.08834411203861237 2023-01-22 07:14:13.295862: step: 188/77, loss: 0.05788188427686691 2023-01-22 07:14:14.593172: step: 192/77, loss: 0.0765821561217308 2023-01-22 07:14:15.930288: step: 196/77, loss: 0.08416645228862762 2023-01-22 07:14:17.201104: step: 200/77, loss: 0.028178736567497253 2023-01-22 07:14:18.487303: step: 204/77, loss: 0.1760944128036499 2023-01-22 07:14:19.778897: step: 208/77, loss: 0.048954010009765625 2023-01-22 07:14:21.060711: step: 212/77, loss: 0.05932794138789177 2023-01-22 07:14:22.336555: step: 216/77, loss: 0.04491445794701576 2023-01-22 07:14:23.675220: step: 220/77, loss: 0.1340390145778656 2023-01-22 07:14:24.934715: step: 224/77, loss: 0.03974044695496559 2023-01-22 07:14:26.223568: step: 228/77, loss: 0.08148594945669174 2023-01-22 07:14:27.565201: step: 232/77, loss: 0.17769792675971985 2023-01-22 07:14:28.846552: step: 236/77, loss: 0.05620008334517479 2023-01-22 07:14:30.150795: step: 240/77, loss: 0.07260948419570923 2023-01-22 07:14:31.446339: step: 244/77, loss: 0.030221108347177505 2023-01-22 07:14:32.748276: step: 248/77, loss: 0.17984890937805176 2023-01-22 07:14:34.047138: step: 252/77, loss: 0.029455162584781647 2023-01-22 07:14:35.297436: step: 256/77, loss: 0.09766645729541779 2023-01-22 07:14:36.616048: step: 260/77, loss: 0.13616687059402466 2023-01-22 07:14:37.942719: step: 264/77, loss: 0.3544989228248596 2023-01-22 07:14:39.233224: step: 268/77, loss: 0.0440225750207901 2023-01-22 07:14:40.540746: step: 272/77, loss: 0.04961168020963669 2023-01-22 07:14:41.844268: step: 276/77, loss: 0.03362132981419563 2023-01-22 07:14:43.164384: step: 280/77, loss: 0.06177300959825516 2023-01-22 07:14:44.491284: step: 284/77, loss: 0.09610987454652786 2023-01-22 07:14:45.809128: step: 288/77, loss: 0.10167451202869415 2023-01-22 07:14:47.142166: step: 292/77, loss: 0.07030784338712692 2023-01-22 07:14:48.426232: step: 296/77, loss: 0.12148582190275192 2023-01-22 07:14:49.738662: step: 300/77, loss: 0.11119399964809418 2023-01-22 07:14:51.040444: step: 304/77, loss: 0.0914122611284256 2023-01-22 07:14:52.304944: step: 308/77, loss: 0.03533243387937546 2023-01-22 07:14:53.626719: step: 312/77, loss: 0.06032838299870491 2023-01-22 07:14:54.937065: step: 316/77, loss: 0.045557308942079544 2023-01-22 07:14:56.197846: step: 320/77, loss: 0.07548585534095764 2023-01-22 07:14:57.487867: step: 324/77, loss: 0.09146370738744736 2023-01-22 07:14:58.802501: step: 328/77, loss: 0.09769196808338165 2023-01-22 07:15:00.091223: step: 332/77, loss: 0.14479124546051025 2023-01-22 07:15:01.369213: step: 336/77, loss: 0.11375805735588074 2023-01-22 07:15:02.705665: step: 340/77, loss: 0.10664797574281693 2023-01-22 07:15:03.995412: step: 344/77, loss: 0.07036581635475159 2023-01-22 07:15:05.315759: step: 348/77, loss: 0.10672543942928314 2023-01-22 07:15:06.562450: step: 352/77, loss: 0.0998711809515953 2023-01-22 07:15:07.877475: step: 356/77, loss: 0.04224449396133423 2023-01-22 07:15:09.165548: step: 360/77, loss: 0.08522751927375793 2023-01-22 07:15:10.509564: step: 364/77, loss: 0.07499724626541138 2023-01-22 07:15:11.768680: step: 368/77, loss: 0.09146278351545334 2023-01-22 07:15:13.089644: step: 372/77, loss: 0.11920887231826782 2023-01-22 07:15:14.412572: step: 376/77, loss: 0.14591817557811737 2023-01-22 07:15:15.666600: step: 380/77, loss: 0.13997526466846466 2023-01-22 07:15:16.975582: step: 384/77, loss: 0.07448755204677582 2023-01-22 07:15:18.241600: step: 388/77, loss: 0.10113322734832764 ================================================== Loss: 0.097 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 2 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:16:59.706255: step: 4/77, loss: 0.16292589902877808 2023-01-22 07:17:00.990286: step: 8/77, loss: 0.10868260264396667 2023-01-22 07:17:02.277335: step: 12/77, loss: 0.08619604259729385 2023-01-22 07:17:03.521942: step: 16/77, loss: 0.17940576374530792 2023-01-22 07:17:04.820242: step: 20/77, loss: 0.05281125381588936 2023-01-22 07:17:06.125807: step: 24/77, loss: 0.10502075403928757 2023-01-22 07:17:07.448691: step: 28/77, loss: 0.05390072986483574 2023-01-22 07:17:08.753758: step: 32/77, loss: 0.09048666805028915 2023-01-22 07:17:10.059019: step: 36/77, loss: 0.030864335596561432 2023-01-22 07:17:11.336417: step: 40/77, loss: 0.02246858924627304 2023-01-22 07:17:12.617735: step: 44/77, loss: 0.18404263257980347 2023-01-22 07:17:13.919236: step: 48/77, loss: 0.0281207375228405 2023-01-22 07:17:15.252777: step: 52/77, loss: 0.13492928445339203 2023-01-22 07:17:16.546842: step: 56/77, loss: 0.04565151408314705 2023-01-22 07:17:17.823306: step: 60/77, loss: 0.09567532688379288 2023-01-22 07:17:19.130129: step: 64/77, loss: 0.11220959573984146 2023-01-22 07:17:20.446589: step: 68/77, loss: 0.0386422835290432 2023-01-22 07:17:21.754356: step: 72/77, loss: 0.04394672438502312 2023-01-22 07:17:23.022842: step: 76/77, loss: 0.03471282497048378 2023-01-22 07:17:24.268226: step: 80/77, loss: 0.030496366322040558 2023-01-22 07:17:25.559850: step: 84/77, loss: 0.06462342292070389 2023-01-22 07:17:26.852282: step: 88/77, loss: 0.02463102713227272 2023-01-22 07:17:28.146361: step: 92/77, loss: 0.02454773709177971 2023-01-22 07:17:29.395413: step: 96/77, loss: 0.03570985421538353 2023-01-22 07:17:30.707039: step: 100/77, loss: 0.04813680425286293 2023-01-22 07:17:32.048001: step: 104/77, loss: 0.07638773322105408 2023-01-22 07:17:33.370894: step: 108/77, loss: 0.10734117776155472 2023-01-22 07:17:34.664872: step: 112/77, loss: 0.05339484289288521 2023-01-22 07:17:35.943294: step: 116/77, loss: 0.08813977986574173 2023-01-22 07:17:37.281263: step: 120/77, loss: 0.029325906187295914 2023-01-22 07:17:38.622521: step: 124/77, loss: 0.039254121482372284 2023-01-22 07:17:39.933314: step: 128/77, loss: 0.0848977193236351 2023-01-22 07:17:41.243739: step: 132/77, loss: 0.08072513341903687 2023-01-22 07:17:42.577020: step: 136/77, loss: 0.014954173937439919 2023-01-22 07:17:43.831097: step: 140/77, loss: 0.059734243899583817 2023-01-22 07:17:45.069176: step: 144/77, loss: 0.00863991491496563 2023-01-22 07:17:46.387779: step: 148/77, loss: 0.020591311156749725 2023-01-22 07:17:47.701050: step: 152/77, loss: 0.12235350906848907 2023-01-22 07:17:48.996250: step: 156/77, loss: 0.0375383123755455 2023-01-22 07:17:50.283035: step: 160/77, loss: 0.028969548642635345 2023-01-22 07:17:51.598276: step: 164/77, loss: 0.031519703567028046 2023-01-22 07:17:52.900174: step: 168/77, loss: 0.06944873929023743 2023-01-22 07:17:54.209821: step: 172/77, loss: 0.015144776552915573 2023-01-22 07:17:55.490398: step: 176/77, loss: 0.018287071958184242 2023-01-22 07:17:56.740781: step: 180/77, loss: 0.1540391743183136 2023-01-22 07:17:58.094463: step: 184/77, loss: 0.07852562516927719 2023-01-22 07:17:59.481511: step: 188/77, loss: 0.03432613983750343 2023-01-22 07:18:00.787281: step: 192/77, loss: 0.023319926112890244 2023-01-22 07:18:02.097024: step: 196/77, loss: 0.00791256595402956 2023-01-22 07:18:03.414386: step: 200/77, loss: 0.008311287499964237 2023-01-22 07:18:04.698904: step: 204/77, loss: 0.031680233776569366 2023-01-22 07:18:05.976481: step: 208/77, loss: 0.033560387790203094 2023-01-22 07:18:07.294477: step: 212/77, loss: 0.22561319172382355 2023-01-22 07:18:08.635356: step: 216/77, loss: 0.02809392288327217 2023-01-22 07:18:09.952495: step: 220/77, loss: 0.01711028814315796 2023-01-22 07:18:11.204783: step: 224/77, loss: 0.04609297588467598 2023-01-22 07:18:12.526894: step: 228/77, loss: 0.16207429766654968 2023-01-22 07:18:13.812212: step: 232/77, loss: 0.04895063489675522 2023-01-22 07:18:15.105061: step: 236/77, loss: 0.1597345918416977 2023-01-22 07:18:16.389043: step: 240/77, loss: 0.08007530868053436 2023-01-22 07:18:17.681060: step: 244/77, loss: 0.0169462151825428 2023-01-22 07:18:18.967227: step: 248/77, loss: 0.01133648119866848 2023-01-22 07:18:20.240878: step: 252/77, loss: 0.018427202478051186 2023-01-22 07:18:21.528598: step: 256/77, loss: 0.018696516752243042 2023-01-22 07:18:22.822034: step: 260/77, loss: 0.07468952238559723 2023-01-22 07:18:24.149958: step: 264/77, loss: 0.07245441526174545 2023-01-22 07:18:25.500147: step: 268/77, loss: 0.00603498425334692 2023-01-22 07:18:26.801582: step: 272/77, loss: 0.015061470679938793 2023-01-22 07:18:28.059351: step: 276/77, loss: 0.0198321845382452 2023-01-22 07:18:29.367268: step: 280/77, loss: 0.020931649953126907 2023-01-22 07:18:30.670723: step: 284/77, loss: 0.0679539144039154 2023-01-22 07:18:31.948238: step: 288/77, loss: 0.07533137500286102 2023-01-22 07:18:33.230342: step: 292/77, loss: 0.07942559570074081 2023-01-22 07:18:34.528032: step: 296/77, loss: 0.05369891971349716 2023-01-22 07:18:35.790084: step: 300/77, loss: 0.11053535342216492 2023-01-22 07:18:37.102732: step: 304/77, loss: 0.015943240374326706 2023-01-22 07:18:38.351421: step: 308/77, loss: 0.0297300573438406 2023-01-22 07:18:39.655037: step: 312/77, loss: 0.005931754130870104 2023-01-22 07:18:40.928112: step: 316/77, loss: 0.03550818935036659 2023-01-22 07:18:42.188106: step: 320/77, loss: 0.026807796210050583 2023-01-22 07:18:43.430909: step: 324/77, loss: 0.0653093159198761 2023-01-22 07:18:44.755048: step: 328/77, loss: 0.1455453783273697 2023-01-22 07:18:46.138020: step: 332/77, loss: 0.039670467376708984 2023-01-22 07:18:47.482770: step: 336/77, loss: 0.0678885355591774 2023-01-22 07:18:48.753898: step: 340/77, loss: 0.016748683527112007 2023-01-22 07:18:50.043300: step: 344/77, loss: 0.023848265409469604 2023-01-22 07:18:51.362960: step: 348/77, loss: 0.02735232189297676 2023-01-22 07:18:52.711087: step: 352/77, loss: 0.040351394563913345 2023-01-22 07:18:53.971855: step: 356/77, loss: 0.053319547325372696 2023-01-22 07:18:55.260499: step: 360/77, loss: 0.08323514461517334 2023-01-22 07:18:56.513548: step: 364/77, loss: 0.06106017902493477 2023-01-22 07:18:57.823539: step: 368/77, loss: 0.012369946576654911 2023-01-22 07:18:59.138226: step: 372/77, loss: 0.1313581019639969 2023-01-22 07:19:00.425310: step: 376/77, loss: 0.039641156792640686 2023-01-22 07:19:01.684331: step: 380/77, loss: 0.09567482769489288 2023-01-22 07:19:02.986783: step: 384/77, loss: 0.13298000395298004 2023-01-22 07:19:04.274784: step: 388/77, loss: 0.020722243934869766 ================================================== Loss: 0.060 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.2833333333333333, 'f1': 0.4415584415584416}, 'slot': {'p': 0.5, 'r': 0.003780718336483932, 'f1': 0.0075046904315197}, 'combined': 0.0033137594113203874, 'epoch': 2} Test Chinese: {'template': {'p': 0.9538461538461539, 'r': 0.4732824427480916, 'f1': 0.6326530612244898}, 'slot': {'p': 0.42857142857142855, 'r': 0.0027100271002710027, 'f1': 0.005385996409335727}, 'combined': 0.0034074671161103583, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.2833333333333333, 'f1': 0.4415584415584416}, 'slot': {'p': 0.5, 'r': 0.003780718336483932, 'f1': 0.0075046904315197}, 'combined': 0.0033137594113203874, 'epoch': 2} Test Korean: {'template': {'p': 0.953125, 'r': 0.46564885496183206, 'f1': 0.6256410256410256}, 'slot': {'p': 0.42857142857142855, 'r': 0.0027100271002710027, 'f1': 0.005385996409335727}, 'combined': 0.0033697003176356858, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.3, 'f1': 0.4615384615384615}, 'slot': {'p': 0.5, 'r': 0.005671077504725898, 'f1': 0.011214953271028037}, 'combined': 0.005176132278936017, 'epoch': 2} Test Russian: {'template': {'p': 0.9538461538461539, 'r': 0.4732824427480916, 'f1': 0.6326530612244898}, 'slot': {'p': 0.42857142857142855, 'r': 0.0027100271002710027, 'f1': 0.005385996409335727}, 'combined': 0.0034074671161103583, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.2833333333333333, 'f1': 0.4415584415584416}, 'slot': {'p': 0.5, 'r': 0.003780718336483932, 'f1': 0.0075046904315197}, 'combined': 0.0033137594113203874, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9538461538461539, 'r': 0.4732824427480916, 'f1': 0.6326530612244898}, 'slot': {'p': 0.42857142857142855, 'r': 0.0027100271002710027, 'f1': 0.005385996409335727}, 'combined': 0.0034074671161103583, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.2833333333333333, 'f1': 0.4415584415584416}, 'slot': {'p': 0.5, 'r': 0.003780718336483932, 'f1': 0.0075046904315197}, 'combined': 0.0033137594113203874, 'epoch': 2} Test for Korean: {'template': {'p': 0.953125, 'r': 0.46564885496183206, 'f1': 0.6256410256410256}, 'slot': {'p': 0.42857142857142855, 'r': 0.0027100271002710027, 'f1': 0.005385996409335727}, 'combined': 0.0033697003176356858, 'epoch': 2} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.3, 'f1': 0.4615384615384615}, 'slot': {'p': 0.5, 'r': 0.005671077504725898, 'f1': 0.011214953271028037}, 'combined': 0.005176132278936017, 'epoch': 2} Test for Russian: {'template': {'p': 0.9538461538461539, 'r': 0.4732824427480916, 'f1': 0.6326530612244898}, 'slot': {'p': 0.42857142857142855, 'r': 0.0027100271002710027, 'f1': 0.005385996409335727}, 'combined': 0.0034074671161103583, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:21:12.866353: step: 4/77, loss: 0.05876729637384415 2023-01-22 07:21:14.147666: step: 8/77, loss: 0.007638957351446152 2023-01-22 07:21:15.446455: step: 12/77, loss: 0.017381764948368073 2023-01-22 07:21:16.715694: step: 16/77, loss: 0.05007513239979744 2023-01-22 07:21:18.006908: step: 20/77, loss: 0.0335371233522892 2023-01-22 07:21:19.296326: step: 24/77, loss: 0.012025022879242897 2023-01-22 07:21:20.594468: step: 28/77, loss: 0.027347933501005173 2023-01-22 07:21:21.892650: step: 32/77, loss: 0.00734763452783227 2023-01-22 07:21:23.217998: step: 36/77, loss: 0.04710661992430687 2023-01-22 07:21:24.501922: step: 40/77, loss: 0.025218769907951355 2023-01-22 07:21:25.790942: step: 44/77, loss: 0.04439151659607887 2023-01-22 07:21:27.150609: step: 48/77, loss: 0.010919013060629368 2023-01-22 07:21:28.453078: step: 52/77, loss: 0.12057239562273026 2023-01-22 07:21:29.722621: step: 56/77, loss: 0.022180480882525444 2023-01-22 07:21:30.981572: step: 60/77, loss: 0.024242157116532326 2023-01-22 07:21:32.312730: step: 64/77, loss: 0.03236313909292221 2023-01-22 07:21:33.624491: step: 68/77, loss: 0.07304063439369202 2023-01-22 07:21:34.897539: step: 72/77, loss: 0.00362603017129004 2023-01-22 07:21:36.219152: step: 76/77, loss: 0.08075417578220367 2023-01-22 07:21:37.508067: step: 80/77, loss: 0.014852027408778667 2023-01-22 07:21:38.800330: step: 84/77, loss: 0.003454985562711954 2023-01-22 07:21:40.105208: step: 88/77, loss: 0.0198269821703434 2023-01-22 07:21:41.434609: step: 92/77, loss: 0.019170943647623062 2023-01-22 07:21:42.691434: step: 96/77, loss: 0.02982323244214058 2023-01-22 07:21:43.994620: step: 100/77, loss: 0.026217911392450333 2023-01-22 07:21:45.261860: step: 104/77, loss: 0.04693746566772461 2023-01-22 07:21:46.517375: step: 108/77, loss: 0.025034580379724503 2023-01-22 07:21:47.789642: step: 112/77, loss: 0.041843514889478683 2023-01-22 07:21:49.037286: step: 116/77, loss: 0.04162782058119774 2023-01-22 07:21:50.343979: step: 120/77, loss: 0.023862779140472412 2023-01-22 07:21:51.643333: step: 124/77, loss: 0.016180217266082764 2023-01-22 07:21:52.933434: step: 128/77, loss: 0.014978856779634953 2023-01-22 07:21:54.261208: step: 132/77, loss: 0.012652370147407055 2023-01-22 07:21:55.624856: step: 136/77, loss: 0.04716186225414276 2023-01-22 07:21:56.860695: step: 140/77, loss: 0.008099338971078396 2023-01-22 07:21:58.142111: step: 144/77, loss: 0.006883854046463966 2023-01-22 07:21:59.431855: step: 148/77, loss: 0.08612009137868881 2023-01-22 07:22:00.708684: step: 152/77, loss: 0.004275473766028881 2023-01-22 07:22:01.995942: step: 156/77, loss: 0.006404031068086624 2023-01-22 07:22:03.292179: step: 160/77, loss: 0.06812773644924164 2023-01-22 07:22:04.578774: step: 164/77, loss: 0.038200266659259796 2023-01-22 07:22:05.828889: step: 168/77, loss: 0.004217622336000204 2023-01-22 07:22:07.133328: step: 172/77, loss: 0.03938934579491615 2023-01-22 07:22:08.427814: step: 176/77, loss: 0.02855922468006611 2023-01-22 07:22:09.736065: step: 180/77, loss: 0.029067791998386383 2023-01-22 07:22:11.032186: step: 184/77, loss: 0.016929026693105698 2023-01-22 07:22:12.316684: step: 188/77, loss: 0.18994294106960297 2023-01-22 07:22:13.624372: step: 192/77, loss: 0.10848405212163925 2023-01-22 07:22:14.870480: step: 196/77, loss: 0.03094695694744587 2023-01-22 07:22:16.174840: step: 200/77, loss: 0.017419282346963882 2023-01-22 07:22:17.467478: step: 204/77, loss: 0.09706324338912964 2023-01-22 07:22:18.762573: step: 208/77, loss: 0.019700979813933372 2023-01-22 07:22:20.079156: step: 212/77, loss: 0.010418681427836418 2023-01-22 07:22:21.387349: step: 216/77, loss: 0.015615657903254032 2023-01-22 07:22:22.698555: step: 220/77, loss: 0.01729726418852806 2023-01-22 07:22:24.009205: step: 224/77, loss: 0.028866499662399292 2023-01-22 07:22:25.270138: step: 228/77, loss: 0.03619959205389023 2023-01-22 07:22:26.578202: step: 232/77, loss: 0.047028932720422745 2023-01-22 07:22:27.878596: step: 236/77, loss: 0.024705318734049797 2023-01-22 07:22:29.196379: step: 240/77, loss: 0.0075832996517419815 2023-01-22 07:22:30.470117: step: 244/77, loss: 0.024004830047488213 2023-01-22 07:22:31.731258: step: 248/77, loss: 0.033208757638931274 2023-01-22 07:22:33.018330: step: 252/77, loss: 0.03347267583012581 2023-01-22 07:22:34.326840: step: 256/77, loss: 0.015569946728646755 2023-01-22 07:22:35.640529: step: 260/77, loss: 0.22618597745895386 2023-01-22 07:22:36.924791: step: 264/77, loss: 0.04095214605331421 2023-01-22 07:22:38.237669: step: 268/77, loss: 0.01501399651169777 2023-01-22 07:22:39.552653: step: 272/77, loss: 0.06192564591765404 2023-01-22 07:22:40.847321: step: 276/77, loss: 0.01665923185646534 2023-01-22 07:22:42.166362: step: 280/77, loss: 0.04294333606958389 2023-01-22 07:22:43.413310: step: 284/77, loss: 0.024771392345428467 2023-01-22 07:22:44.685087: step: 288/77, loss: 0.07365136593580246 2023-01-22 07:22:45.951573: step: 292/77, loss: 0.014921758323907852 2023-01-22 07:22:47.277145: step: 296/77, loss: 0.06613760441541672 2023-01-22 07:22:48.594088: step: 300/77, loss: 0.026917394250631332 2023-01-22 07:22:49.898047: step: 304/77, loss: 0.010461905039846897 2023-01-22 07:22:51.178564: step: 308/77, loss: 0.019604841247200966 2023-01-22 07:22:52.489066: step: 312/77, loss: 0.008131084032356739 2023-01-22 07:22:53.747261: step: 316/77, loss: 0.018199313431978226 2023-01-22 07:22:55.079196: step: 320/77, loss: 0.024113517254590988 2023-01-22 07:22:56.406049: step: 324/77, loss: 0.007118526380509138 2023-01-22 07:22:57.747285: step: 328/77, loss: 0.046088460832834244 2023-01-22 07:22:59.033859: step: 332/77, loss: 0.008069220930337906 2023-01-22 07:23:00.328261: step: 336/77, loss: 0.010732055641710758 2023-01-22 07:23:01.653722: step: 340/77, loss: 0.01751190796494484 2023-01-22 07:23:02.929879: step: 344/77, loss: 0.0584302619099617 2023-01-22 07:23:04.156608: step: 348/77, loss: 0.01757621206343174 2023-01-22 07:23:05.469280: step: 352/77, loss: 0.03035123646259308 2023-01-22 07:23:06.768967: step: 356/77, loss: 0.06061801314353943 2023-01-22 07:23:08.074945: step: 360/77, loss: 0.052744798362255096 2023-01-22 07:23:09.348676: step: 364/77, loss: 0.04332219436764717 2023-01-22 07:23:10.681027: step: 368/77, loss: 0.06133495271205902 2023-01-22 07:23:11.942304: step: 372/77, loss: 0.014132341369986534 2023-01-22 07:23:13.220536: step: 376/77, loss: 0.018833355978131294 2023-01-22 07:23:14.484531: step: 380/77, loss: 0.01984577253460884 2023-01-22 07:23:15.786911: step: 384/77, loss: 0.013279399834573269 2023-01-22 07:23:17.089391: step: 388/77, loss: 0.06788233667612076 ================================================== Loss: 0.035 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5038167938931297, 'f1': 0.6534653465346535}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.017121362791300963, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Russian: {'template': {'p': 0.9295774647887324, 'r': 0.5038167938931297, 'f1': 0.6534653465346535}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.017121362791300963, 'epoch': 3} Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} ****************************** Epoch: 4 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:25:25.628155: step: 4/77, loss: 0.017973169684410095 2023-01-22 07:25:26.951569: step: 8/77, loss: 0.032591622322797775 2023-01-22 07:25:28.206482: step: 12/77, loss: 0.027060119435191154 2023-01-22 07:25:29.497620: step: 16/77, loss: 0.009688153862953186 2023-01-22 07:25:30.823518: step: 20/77, loss: 0.05535442754626274 2023-01-22 07:25:32.082202: step: 24/77, loss: 0.016799340024590492 2023-01-22 07:25:33.363942: step: 28/77, loss: 0.028497029095888138 2023-01-22 07:25:34.662886: step: 32/77, loss: 0.016341451555490494 2023-01-22 07:25:35.949253: step: 36/77, loss: 0.018108392134308815 2023-01-22 07:25:37.186604: step: 40/77, loss: 0.0141389025375247 2023-01-22 07:25:38.512340: step: 44/77, loss: 0.0389748252928257 2023-01-22 07:25:39.825154: step: 48/77, loss: 0.059972554445266724 2023-01-22 07:25:41.098004: step: 52/77, loss: 0.08551490306854248 2023-01-22 07:25:42.348976: step: 56/77, loss: 0.01003539189696312 2023-01-22 07:25:43.646707: step: 60/77, loss: 0.08415193110704422 2023-01-22 07:25:44.901533: step: 64/77, loss: 0.05124989524483681 2023-01-22 07:25:46.161024: step: 68/77, loss: 0.028651192784309387 2023-01-22 07:25:47.467678: step: 72/77, loss: 0.06674402952194214 2023-01-22 07:25:48.758194: step: 76/77, loss: 0.0061377864331007 2023-01-22 07:25:50.076668: step: 80/77, loss: 0.0033220085315406322 2023-01-22 07:25:51.431371: step: 84/77, loss: 0.0645206943154335 2023-01-22 07:25:52.720719: step: 88/77, loss: 0.007097979541867971 2023-01-22 07:25:54.021631: step: 92/77, loss: 0.07385362684726715 2023-01-22 07:25:55.299465: step: 96/77, loss: 0.024214647710323334 2023-01-22 07:25:56.593987: step: 100/77, loss: 0.03102937713265419 2023-01-22 07:25:57.929619: step: 104/77, loss: 0.0496261827647686 2023-01-22 07:25:59.244516: step: 108/77, loss: 0.022620979696512222 2023-01-22 07:26:00.514123: step: 112/77, loss: 0.05239538848400116 2023-01-22 07:26:01.795954: step: 116/77, loss: 0.032066408544778824 2023-01-22 07:26:03.095192: step: 120/77, loss: 0.0036533609963953495 2023-01-22 07:26:04.398175: step: 124/77, loss: 0.013895252719521523 2023-01-22 07:26:05.634056: step: 128/77, loss: 0.043253764510154724 2023-01-22 07:26:06.927718: step: 132/77, loss: 0.0052756816148757935 2023-01-22 07:26:08.216869: step: 136/77, loss: 0.04925745353102684 2023-01-22 07:26:09.486656: step: 140/77, loss: 0.032040733844041824 2023-01-22 07:26:10.779475: step: 144/77, loss: 0.023572130128741264 2023-01-22 07:26:12.061127: step: 148/77, loss: 0.024410491809248924 2023-01-22 07:26:13.348151: step: 152/77, loss: 0.009820953011512756 2023-01-22 07:26:14.604586: step: 156/77, loss: 0.045823048800230026 2023-01-22 07:26:15.878880: step: 160/77, loss: 0.005057765636593103 2023-01-22 07:26:17.204049: step: 164/77, loss: 0.03875165060162544 2023-01-22 07:26:18.532811: step: 168/77, loss: 0.025209451094269753 2023-01-22 07:26:19.851875: step: 172/77, loss: 0.02618713490664959 2023-01-22 07:26:21.195702: step: 176/77, loss: 0.028252843767404556 2023-01-22 07:26:22.473302: step: 180/77, loss: 0.004084436688572168 2023-01-22 07:26:23.749378: step: 184/77, loss: 0.019054951146245003 2023-01-22 07:26:25.059116: step: 188/77, loss: 0.012868019752204418 2023-01-22 07:26:26.403399: step: 192/77, loss: 0.06383177638053894 2023-01-22 07:26:27.743088: step: 196/77, loss: 0.016221703961491585 2023-01-22 07:26:29.034398: step: 200/77, loss: 0.021557895466685295 2023-01-22 07:26:30.325069: step: 204/77, loss: 0.00647857878357172 2023-01-22 07:26:31.605810: step: 208/77, loss: 0.021689537912607193 2023-01-22 07:26:32.907363: step: 212/77, loss: 0.01842329651117325 2023-01-22 07:26:34.209991: step: 216/77, loss: 0.02668784372508526 2023-01-22 07:26:35.508871: step: 220/77, loss: 0.01759856380522251 2023-01-22 07:26:36.834085: step: 224/77, loss: 0.007021921221166849 2023-01-22 07:26:38.140763: step: 228/77, loss: 0.029395049437880516 2023-01-22 07:26:39.442743: step: 232/77, loss: 0.012759885750710964 2023-01-22 07:26:40.747790: step: 236/77, loss: 0.00871328730136156 2023-01-22 07:26:42.075572: step: 240/77, loss: 0.02408706396818161 2023-01-22 07:26:43.353238: step: 244/77, loss: 0.046969473361968994 2023-01-22 07:26:44.662956: step: 248/77, loss: 0.060625314712524414 2023-01-22 07:26:45.929167: step: 252/77, loss: 0.058289043605327606 2023-01-22 07:26:47.223334: step: 256/77, loss: 0.0016667278250679374 2023-01-22 07:26:48.497365: step: 260/77, loss: 0.028811514377593994 2023-01-22 07:26:49.802206: step: 264/77, loss: 0.0719747543334961 2023-01-22 07:26:51.158372: step: 268/77, loss: 0.020027348771691322 2023-01-22 07:26:52.426385: step: 272/77, loss: 0.07541616261005402 2023-01-22 07:26:53.739224: step: 276/77, loss: 0.023574626073241234 2023-01-22 07:26:55.016708: step: 280/77, loss: 0.04856455698609352 2023-01-22 07:26:56.298112: step: 284/77, loss: 0.09293892234563828 2023-01-22 07:26:57.567042: step: 288/77, loss: 0.11887235194444656 2023-01-22 07:26:58.832526: step: 292/77, loss: 0.00804845243692398 2023-01-22 07:27:00.089415: step: 296/77, loss: 0.05419022589921951 2023-01-22 07:27:01.374494: step: 300/77, loss: 0.017192896455526352 2023-01-22 07:27:02.666520: step: 304/77, loss: 0.010086746886372566 2023-01-22 07:27:03.983463: step: 308/77, loss: 0.08471477776765823 2023-01-22 07:27:05.269740: step: 312/77, loss: 0.07080481946468353 2023-01-22 07:27:06.534783: step: 316/77, loss: 0.018073439598083496 2023-01-22 07:27:07.834114: step: 320/77, loss: 0.11502989381551743 2023-01-22 07:27:09.140120: step: 324/77, loss: 0.004280322231352329 2023-01-22 07:27:10.481646: step: 328/77, loss: 0.015781881287693977 2023-01-22 07:27:11.743599: step: 332/77, loss: 0.13091981410980225 2023-01-22 07:27:13.015577: step: 336/77, loss: 0.05560621619224548 2023-01-22 07:27:14.280253: step: 340/77, loss: 0.030395524576306343 2023-01-22 07:27:15.613094: step: 344/77, loss: 0.014790229499340057 2023-01-22 07:27:16.915618: step: 348/77, loss: 0.003662054194137454 2023-01-22 07:27:18.228116: step: 352/77, loss: 0.04369152709841728 2023-01-22 07:27:19.538968: step: 356/77, loss: 0.1887076199054718 2023-01-22 07:27:20.836246: step: 360/77, loss: 0.054661963135004044 2023-01-22 07:27:22.106749: step: 364/77, loss: 0.022252492606639862 2023-01-22 07:27:23.413600: step: 368/77, loss: 0.09166667610406876 2023-01-22 07:27:24.710702: step: 372/77, loss: 0.020127348601818085 2023-01-22 07:27:25.970292: step: 376/77, loss: 0.00931414496153593 2023-01-22 07:27:27.261212: step: 380/77, loss: 0.03850318491458893 2023-01-22 07:27:28.568778: step: 384/77, loss: 0.042095109820365906 2023-01-22 07:27:29.865393: step: 388/77, loss: 0.016215737909078598 ================================================== Loss: 0.036 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Chinese: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Korean: {'template': {'p': 0.926829268292683, 'r': 0.5801526717557252, 'f1': 0.7136150234741785}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012608039283996087, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 5 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:29:20.106643: step: 4/77, loss: 0.014158310368657112 2023-01-22 07:29:21.382828: step: 8/77, loss: 0.04366450011730194 2023-01-22 07:29:22.666251: step: 12/77, loss: 0.002636625897139311 2023-01-22 07:29:23.966444: step: 16/77, loss: 0.022517450153827667 2023-01-22 07:29:25.203625: step: 20/77, loss: 0.020733755081892014 2023-01-22 07:29:26.533899: step: 24/77, loss: 0.1309209167957306 2023-01-22 07:29:27.836958: step: 28/77, loss: 0.005739598069339991 2023-01-22 07:29:29.123033: step: 32/77, loss: 0.002989714965224266 2023-01-22 07:29:30.431512: step: 36/77, loss: 0.046741776168346405 2023-01-22 07:29:31.707481: step: 40/77, loss: 0.1020488366484642 2023-01-22 07:29:32.949878: step: 44/77, loss: 0.013086818158626556 2023-01-22 07:29:34.246746: step: 48/77, loss: 0.014181993901729584 2023-01-22 07:29:35.533182: step: 52/77, loss: 0.04863457381725311 2023-01-22 07:29:36.842892: step: 56/77, loss: 0.004329562187194824 2023-01-22 07:29:38.127993: step: 60/77, loss: 0.04907675087451935 2023-01-22 07:29:39.413634: step: 64/77, loss: 0.0503888800740242 2023-01-22 07:29:40.688032: step: 68/77, loss: 0.02209595963358879 2023-01-22 07:29:41.959988: step: 72/77, loss: 0.021710550412535667 2023-01-22 07:29:43.257810: step: 76/77, loss: 0.013041822239756584 2023-01-22 07:29:44.572496: step: 80/77, loss: 0.004665164276957512 2023-01-22 07:29:45.828582: step: 84/77, loss: 0.03621523827314377 2023-01-22 07:29:47.137419: step: 88/77, loss: 0.0018241109792143106 2023-01-22 07:29:48.406376: step: 92/77, loss: 0.06880193203687668 2023-01-22 07:29:49.749772: step: 96/77, loss: 0.018401481211185455 2023-01-22 07:29:51.115547: step: 100/77, loss: 0.02690611407160759 2023-01-22 07:29:52.435895: step: 104/77, loss: 0.039673320949077606 2023-01-22 07:29:53.746798: step: 108/77, loss: 0.014530510641634464 2023-01-22 07:29:55.057119: step: 112/77, loss: 0.042198315262794495 2023-01-22 07:29:56.355193: step: 116/77, loss: 0.011293873190879822 2023-01-22 07:29:57.698590: step: 120/77, loss: 0.004281324800103903 2023-01-22 07:29:59.026661: step: 124/77, loss: 0.006314275786280632 2023-01-22 07:30:00.346307: step: 128/77, loss: 0.004242511931806803 2023-01-22 07:30:01.650030: step: 132/77, loss: 0.0036362670361995697 2023-01-22 07:30:02.884195: step: 136/77, loss: 0.053869958966970444 2023-01-22 07:30:04.147419: step: 140/77, loss: 0.033010292798280716 2023-01-22 07:30:05.483879: step: 144/77, loss: 0.028418347239494324 2023-01-22 07:30:06.776773: step: 148/77, loss: 0.03635575622320175 2023-01-22 07:30:08.106342: step: 152/77, loss: 0.00185483624227345 2023-01-22 07:30:09.431712: step: 156/77, loss: 0.025224221870303154 2023-01-22 07:30:10.758067: step: 160/77, loss: 0.02771889604628086 2023-01-22 07:30:12.048641: step: 164/77, loss: 0.030288076028227806 2023-01-22 07:30:13.378736: step: 168/77, loss: 0.0323544517159462 2023-01-22 07:30:14.670641: step: 172/77, loss: 0.019649991765618324 2023-01-22 07:30:15.972425: step: 176/77, loss: 0.09695424884557724 2023-01-22 07:30:17.247913: step: 180/77, loss: 0.03970777988433838 2023-01-22 07:30:18.520275: step: 184/77, loss: 0.008301706984639168 2023-01-22 07:30:19.752759: step: 188/77, loss: 0.003423915943130851 2023-01-22 07:30:21.084420: step: 192/77, loss: 0.022232314571738243 2023-01-22 07:30:22.373600: step: 196/77, loss: 0.004373743664473295 2023-01-22 07:30:23.674707: step: 200/77, loss: 0.13403457403182983 2023-01-22 07:30:24.947497: step: 204/77, loss: 0.0042198095470666885 2023-01-22 07:30:26.274074: step: 208/77, loss: 0.0686882734298706 2023-01-22 07:30:27.590870: step: 212/77, loss: 0.000655624084174633 2023-01-22 07:30:28.839818: step: 216/77, loss: 0.04420031979680061 2023-01-22 07:30:30.161570: step: 220/77, loss: 0.01031609158962965 2023-01-22 07:30:31.473999: step: 224/77, loss: 0.002405523555353284 2023-01-22 07:30:32.815932: step: 228/77, loss: 0.02606010064482689 2023-01-22 07:30:34.079402: step: 232/77, loss: 0.016452355310320854 2023-01-22 07:30:35.364406: step: 236/77, loss: 0.0966537743806839 2023-01-22 07:30:36.653757: step: 240/77, loss: 0.015614238567650318 2023-01-22 07:30:37.908536: step: 244/77, loss: 0.017007920891046524 2023-01-22 07:30:39.231725: step: 248/77, loss: 0.03029618225991726 2023-01-22 07:30:40.544833: step: 252/77, loss: 0.013531284406781197 2023-01-22 07:30:41.870275: step: 256/77, loss: 0.01680459827184677 2023-01-22 07:30:43.206747: step: 260/77, loss: 0.024821948260068893 2023-01-22 07:30:44.528789: step: 264/77, loss: 0.03455809876322746 2023-01-22 07:30:45.781192: step: 268/77, loss: 0.04500247538089752 2023-01-22 07:30:47.119787: step: 272/77, loss: 0.10835660248994827 2023-01-22 07:30:48.451463: step: 276/77, loss: 0.006907912902534008 2023-01-22 07:30:49.764948: step: 280/77, loss: 0.07391379773616791 2023-01-22 07:30:51.074916: step: 284/77, loss: 0.018085170537233353 2023-01-22 07:30:52.402490: step: 288/77, loss: 0.007498640567064285 2023-01-22 07:30:53.731706: step: 292/77, loss: 0.001336018554866314 2023-01-22 07:30:55.067641: step: 296/77, loss: 0.0006786617450416088 2023-01-22 07:30:56.382934: step: 300/77, loss: 0.010314485989511013 2023-01-22 07:30:57.702892: step: 304/77, loss: 0.02813785895705223 2023-01-22 07:30:59.024628: step: 308/77, loss: 0.013725925236940384 2023-01-22 07:31:00.320509: step: 312/77, loss: 0.005271845497190952 2023-01-22 07:31:01.614381: step: 316/77, loss: 0.02690902352333069 2023-01-22 07:31:02.881387: step: 320/77, loss: 0.04803672432899475 2023-01-22 07:31:04.196167: step: 324/77, loss: 0.05544379726052284 2023-01-22 07:31:05.532100: step: 328/77, loss: 0.021013982594013214 2023-01-22 07:31:06.820187: step: 332/77, loss: 0.06210104376077652 2023-01-22 07:31:08.129440: step: 336/77, loss: 0.010880689136683941 2023-01-22 07:31:09.449025: step: 340/77, loss: 0.017330151051282883 2023-01-22 07:31:10.789554: step: 344/77, loss: 0.1314152628183365 2023-01-22 07:31:12.038061: step: 348/77, loss: 0.006899089552462101 2023-01-22 07:31:13.349203: step: 352/77, loss: 0.019113019108772278 2023-01-22 07:31:14.691306: step: 356/77, loss: 0.0011101725976914167 2023-01-22 07:31:15.957273: step: 360/77, loss: 0.01264708861708641 2023-01-22 07:31:17.226433: step: 364/77, loss: 0.04578924551606178 2023-01-22 07:31:18.541328: step: 368/77, loss: 0.0057904161512851715 2023-01-22 07:31:19.839026: step: 372/77, loss: 0.004603937268257141 2023-01-22 07:31:21.122739: step: 376/77, loss: 0.02703516185283661 2023-01-22 07:31:22.425910: step: 380/77, loss: 0.009556187316775322 2023-01-22 07:31:23.728600: step: 384/77, loss: 0.017396705225110054 2023-01-22 07:31:25.066470: step: 388/77, loss: 0.0022095218300819397 ================================================== Loss: 0.029 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test Chinese: {'template': {'p': 0.9466666666666667, 'r': 0.5419847328244275, 'f1': 0.6893203883495146}, 'slot': {'p': 0.5, 'r': 0.00993676603432701, 'f1': 0.01948627103631532}, 'combined': 0.013432283918236776, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test Korean: {'template': {'p': 0.9342105263157895, 'r': 0.5419847328244275, 'f1': 0.6859903381642513}, 'slot': {'p': 0.4782608695652174, 'r': 0.00993676603432701, 'f1': 0.019469026548672566}, 'combined': 0.01335556410585268, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 5} Test Russian: {'template': {'p': 0.9466666666666667, 'r': 0.5419847328244275, 'f1': 0.6893203883495146}, 'slot': {'p': 0.4782608695652174, 'r': 0.00993676603432701, 'f1': 0.019469026548672566}, 'combined': 0.013420396941317983, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 6 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:33:09.432938: step: 4/77, loss: 0.0183593537658453 2023-01-22 07:33:10.692176: step: 8/77, loss: 0.005260112229734659 2023-01-22 07:33:11.948636: step: 12/77, loss: 0.0007216347148641944 2023-01-22 07:33:13.225934: step: 16/77, loss: 0.051519881933927536 2023-01-22 07:33:14.421186: step: 20/77, loss: 0.002809650730341673 2023-01-22 07:33:15.692595: step: 24/77, loss: 0.06290657073259354 2023-01-22 07:33:16.965959: step: 28/77, loss: 0.028903154656291008 2023-01-22 07:33:18.262005: step: 32/77, loss: 0.0008781985379755497 2023-01-22 07:33:19.549965: step: 36/77, loss: 0.01905008964240551 2023-01-22 07:33:20.871527: step: 40/77, loss: 0.015037070028483868 2023-01-22 07:33:22.165692: step: 44/77, loss: 0.026022832840681076 2023-01-22 07:33:23.418595: step: 48/77, loss: 0.002238120650872588 2023-01-22 07:33:24.754618: step: 52/77, loss: 0.01229383796453476 2023-01-22 07:33:26.059226: step: 56/77, loss: 0.03464057669043541 2023-01-22 07:33:27.293228: step: 60/77, loss: 0.0020629707723855972 2023-01-22 07:33:28.578574: step: 64/77, loss: 0.02100500650703907 2023-01-22 07:33:29.855991: step: 68/77, loss: 0.09934692829847336 2023-01-22 07:33:31.163075: step: 72/77, loss: 0.022709909826517105 2023-01-22 07:33:32.485369: step: 76/77, loss: 0.007556264288723469 2023-01-22 07:33:33.762884: step: 80/77, loss: 0.002152753295376897 2023-01-22 07:33:35.094433: step: 84/77, loss: 0.017157964408397675 2023-01-22 07:33:36.365250: step: 88/77, loss: 0.021427417173981667 2023-01-22 07:33:37.715272: step: 92/77, loss: 0.000519811874255538 2023-01-22 07:33:39.009467: step: 96/77, loss: 0.04389156028628349 2023-01-22 07:33:40.324845: step: 100/77, loss: 0.030964136123657227 2023-01-22 07:33:41.655094: step: 104/77, loss: 0.03548784181475639 2023-01-22 07:33:42.966717: step: 108/77, loss: 0.04955758899450302 2023-01-22 07:33:44.276823: step: 112/77, loss: 0.01200494822114706 2023-01-22 07:33:45.570817: step: 116/77, loss: 0.09407459199428558 2023-01-22 07:33:46.849818: step: 120/77, loss: 0.0007696656975895166 2023-01-22 07:33:48.122758: step: 124/77, loss: 0.0031200533267110586 2023-01-22 07:33:49.417550: step: 128/77, loss: 0.01818021386861801 2023-01-22 07:33:50.774803: step: 132/77, loss: 0.010161369107663631 2023-01-22 07:33:52.071721: step: 136/77, loss: 0.02967633120715618 2023-01-22 07:33:53.372805: step: 140/77, loss: 0.037315886467695236 2023-01-22 07:33:54.668367: step: 144/77, loss: 0.02900487184524536 2023-01-22 07:33:55.979283: step: 148/77, loss: 0.0012069009244441986 2023-01-22 07:33:57.240026: step: 152/77, loss: 0.0053014555014669895 2023-01-22 07:33:58.595232: step: 156/77, loss: 0.009256775490939617 2023-01-22 07:33:59.845335: step: 160/77, loss: 0.03802155330777168 2023-01-22 07:34:01.152264: step: 164/77, loss: 0.022010542452335358 2023-01-22 07:34:02.464581: step: 168/77, loss: 0.036441680043935776 2023-01-22 07:34:03.734749: step: 172/77, loss: 0.07211640477180481 2023-01-22 07:34:05.016156: step: 176/77, loss: 0.00906977616250515 2023-01-22 07:34:06.328593: step: 180/77, loss: 0.07605388760566711 2023-01-22 07:34:07.631299: step: 184/77, loss: 0.04105954244732857 2023-01-22 07:34:08.942989: step: 188/77, loss: 0.029035797342658043 2023-01-22 07:34:10.252252: step: 192/77, loss: 0.02307794988155365 2023-01-22 07:34:11.576630: step: 196/77, loss: 0.020407551899552345 2023-01-22 07:34:12.824714: step: 200/77, loss: 0.009717256762087345 2023-01-22 07:34:14.108153: step: 204/77, loss: 0.025658661499619484 2023-01-22 07:34:15.435156: step: 208/77, loss: 0.012879461981356144 2023-01-22 07:34:16.697796: step: 212/77, loss: 0.01909041218459606 2023-01-22 07:34:18.003544: step: 216/77, loss: 0.029446642845869064 2023-01-22 07:34:19.261139: step: 220/77, loss: 0.07164009660482407 2023-01-22 07:34:20.591181: step: 224/77, loss: 0.019298046827316284 2023-01-22 07:34:21.881087: step: 228/77, loss: 0.20703347027301788 2023-01-22 07:34:23.134643: step: 232/77, loss: 0.008604494854807854 2023-01-22 07:34:24.441122: step: 236/77, loss: 0.0033027713652700186 2023-01-22 07:34:25.693888: step: 240/77, loss: 0.028479279950261116 2023-01-22 07:34:26.993156: step: 244/77, loss: 0.020767278969287872 2023-01-22 07:34:28.267720: step: 248/77, loss: 0.0023850565776228905 2023-01-22 07:34:29.555728: step: 252/77, loss: 0.03148641437292099 2023-01-22 07:34:30.839209: step: 256/77, loss: 0.01915784925222397 2023-01-22 07:34:32.074707: step: 260/77, loss: 0.007667901925742626 2023-01-22 07:34:33.374007: step: 264/77, loss: 0.022026922553777695 2023-01-22 07:34:34.633653: step: 268/77, loss: 0.01379552111029625 2023-01-22 07:34:35.932749: step: 272/77, loss: 0.009896479547023773 2023-01-22 07:34:37.210752: step: 276/77, loss: 0.03870394080877304 2023-01-22 07:34:38.536592: step: 280/77, loss: 0.09177344292402267 2023-01-22 07:34:39.831848: step: 284/77, loss: 0.02456502430140972 2023-01-22 07:34:41.185410: step: 288/77, loss: 0.002290900330990553 2023-01-22 07:34:42.466959: step: 292/77, loss: 0.0007645561126992106 2023-01-22 07:34:43.738291: step: 296/77, loss: 0.003591812215745449 2023-01-22 07:34:45.072417: step: 300/77, loss: 0.02337750233709812 2023-01-22 07:34:46.325016: step: 304/77, loss: 0.00023124905419535935 2023-01-22 07:34:47.624787: step: 308/77, loss: 0.017381660640239716 2023-01-22 07:34:48.955106: step: 312/77, loss: 0.024244729429483414 2023-01-22 07:34:50.260690: step: 316/77, loss: 0.0018833799986168742 2023-01-22 07:34:51.582144: step: 320/77, loss: 0.004297305829823017 2023-01-22 07:34:52.863466: step: 324/77, loss: 0.00848829373717308 2023-01-22 07:34:54.150908: step: 328/77, loss: 0.011697773821651936 2023-01-22 07:34:55.458558: step: 332/77, loss: 0.028859535232186317 2023-01-22 07:34:56.748723: step: 336/77, loss: 0.0342305451631546 2023-01-22 07:34:58.024289: step: 340/77, loss: 0.009530341252684593 2023-01-22 07:34:59.299260: step: 344/77, loss: 0.05515669286251068 2023-01-22 07:35:00.533829: step: 348/77, loss: 0.05997423827648163 2023-01-22 07:35:01.834781: step: 352/77, loss: 0.16188277304172516 2023-01-22 07:35:03.118144: step: 356/77, loss: 0.03290410712361336 2023-01-22 07:35:04.381205: step: 360/77, loss: 0.01108237449079752 2023-01-22 07:35:05.670949: step: 364/77, loss: 0.011187870055437088 2023-01-22 07:35:06.976988: step: 368/77, loss: 0.0055564031936228275 2023-01-22 07:35:08.231451: step: 372/77, loss: 0.0013651493936777115 2023-01-22 07:35:09.544056: step: 376/77, loss: 0.01341039314866066 2023-01-22 07:35:10.873488: step: 380/77, loss: 0.009387511759996414 2023-01-22 07:35:12.187944: step: 384/77, loss: 0.020141761749982834 2023-01-22 07:35:13.471232: step: 388/77, loss: 0.053573716431856155 ================================================== Loss: 0.027 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04890349201497669, 'epoch': 6} Test Chinese: {'template': {'p': 0.971830985915493, 'r': 0.5267175572519084, 'f1': 0.6831683168316832}, 'slot': {'p': 0.4642857142857143, 'r': 0.011743450767841012, 'f1': 0.022907488986784144}, 'combined': 0.015649670693941645, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04890349201497669, 'epoch': 6} Test Korean: {'template': {'p': 0.971830985915493, 'r': 0.5267175572519084, 'f1': 0.6831683168316832}, 'slot': {'p': 0.4642857142857143, 'r': 0.011743450767841012, 'f1': 0.022907488986784144}, 'combined': 0.015649670693941645, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04988944951527864, 'epoch': 6} Test Russian: {'template': {'p': 0.971830985915493, 'r': 0.5267175572519084, 'f1': 0.6831683168316832}, 'slot': {'p': 0.4642857142857143, 'r': 0.011743450767841012, 'f1': 0.022907488986784144}, 'combined': 0.015649670693941645, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 6} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 7 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:36:54.755978: step: 4/77, loss: 0.0011636499548330903 2023-01-22 07:36:56.017151: step: 8/77, loss: 0.0012626384850591421 2023-01-22 07:36:57.299651: step: 12/77, loss: 0.017299551516771317 2023-01-22 07:36:58.569279: step: 16/77, loss: 0.0068957023322582245 2023-01-22 07:36:59.901198: step: 20/77, loss: 0.056037407368421555 2023-01-22 07:37:01.186145: step: 24/77, loss: 0.006196138449013233 2023-01-22 07:37:02.484205: step: 28/77, loss: 0.014314234256744385 2023-01-22 07:37:03.802044: step: 32/77, loss: 0.004170101601630449 2023-01-22 07:37:05.115811: step: 36/77, loss: 0.008770820684731007 2023-01-22 07:37:06.421561: step: 40/77, loss: 0.02249545231461525 2023-01-22 07:37:07.685157: step: 44/77, loss: 0.012950205244123936 2023-01-22 07:37:09.017448: step: 48/77, loss: 0.04626951366662979 2023-01-22 07:37:10.325764: step: 52/77, loss: 0.006529450882226229 2023-01-22 07:37:11.641563: step: 56/77, loss: 0.03173508495092392 2023-01-22 07:37:12.940243: step: 60/77, loss: 0.004960111808031797 2023-01-22 07:37:14.239127: step: 64/77, loss: 0.024349186569452286 2023-01-22 07:37:15.582901: step: 68/77, loss: 0.031386882066726685 2023-01-22 07:37:16.874667: step: 72/77, loss: 0.013314485549926758 2023-01-22 07:37:18.147527: step: 76/77, loss: 0.002529819030314684 2023-01-22 07:37:19.409938: step: 80/77, loss: 0.00077056820737198 2023-01-22 07:37:20.713178: step: 84/77, loss: 0.01021594274789095 2023-01-22 07:37:22.010019: step: 88/77, loss: 0.02939591556787491 2023-01-22 07:37:23.311038: step: 92/77, loss: 0.0036745904944837093 2023-01-22 07:37:24.630795: step: 96/77, loss: 0.060694627463817596 2023-01-22 07:37:25.909415: step: 100/77, loss: 0.006574501283466816 2023-01-22 07:37:27.190690: step: 104/77, loss: 0.03242919594049454 2023-01-22 07:37:28.492953: step: 108/77, loss: 0.010640475898981094 2023-01-22 07:37:29.832544: step: 112/77, loss: 0.026455843821167946 2023-01-22 07:37:31.138382: step: 116/77, loss: 0.002888244343921542 2023-01-22 07:37:32.448244: step: 120/77, loss: 0.03597759082913399 2023-01-22 07:37:33.718743: step: 124/77, loss: 0.0699692815542221 2023-01-22 07:37:34.995628: step: 128/77, loss: 0.01892452873289585 2023-01-22 07:37:36.296899: step: 132/77, loss: 0.022072017192840576 2023-01-22 07:37:37.582766: step: 136/77, loss: 0.022971363738179207 2023-01-22 07:37:38.861398: step: 140/77, loss: 0.03502960875630379 2023-01-22 07:37:40.170135: step: 144/77, loss: 0.0037489754613488913 2023-01-22 07:37:41.449867: step: 148/77, loss: 0.0136228296905756 2023-01-22 07:37:42.730457: step: 152/77, loss: 0.016560204327106476 2023-01-22 07:37:43.957224: step: 156/77, loss: 0.01532871276140213 2023-01-22 07:37:45.265464: step: 160/77, loss: 0.02267695590853691 2023-01-22 07:37:46.585535: step: 164/77, loss: 0.02335749752819538 2023-01-22 07:37:47.861006: step: 168/77, loss: 0.003120360430330038 2023-01-22 07:37:49.181318: step: 172/77, loss: 0.010223385877907276 2023-01-22 07:37:50.470120: step: 176/77, loss: 0.041943587362766266 2023-01-22 07:37:51.780680: step: 180/77, loss: 0.0003906584461219609 2023-01-22 07:37:53.062451: step: 184/77, loss: 0.02529878355562687 2023-01-22 07:37:54.380249: step: 188/77, loss: 0.009768795222043991 2023-01-22 07:37:55.656910: step: 192/77, loss: 0.004147401079535484 2023-01-22 07:37:56.978954: step: 196/77, loss: 0.022411402314901352 2023-01-22 07:37:58.294376: step: 200/77, loss: 0.003953585401177406 2023-01-22 07:37:59.574949: step: 204/77, loss: 0.02571061998605728 2023-01-22 07:38:00.924623: step: 208/77, loss: 0.026847444474697113 2023-01-22 07:38:02.215014: step: 212/77, loss: 0.0530376061797142 2023-01-22 07:38:03.525013: step: 216/77, loss: 0.034075118601322174 2023-01-22 07:38:04.833349: step: 220/77, loss: 0.12706109881401062 2023-01-22 07:38:06.123013: step: 224/77, loss: 0.04803653433918953 2023-01-22 07:38:07.426668: step: 228/77, loss: 0.0221688412129879 2023-01-22 07:38:08.700839: step: 232/77, loss: 0.007290499284863472 2023-01-22 07:38:09.989827: step: 236/77, loss: 0.017815163359045982 2023-01-22 07:38:11.284186: step: 240/77, loss: 0.0004054011660628021 2023-01-22 07:38:12.539163: step: 244/77, loss: 0.11127576977014542 2023-01-22 07:38:13.817520: step: 248/77, loss: 0.0034016177523881197 2023-01-22 07:38:15.110779: step: 252/77, loss: 0.016864141449332237 2023-01-22 07:38:16.428626: step: 256/77, loss: 0.018525857478380203 2023-01-22 07:38:17.747190: step: 260/77, loss: 0.008020728826522827 2023-01-22 07:38:19.081340: step: 264/77, loss: 0.0004376893921289593 2023-01-22 07:38:20.353689: step: 268/77, loss: 0.022905485704541206 2023-01-22 07:38:21.632503: step: 272/77, loss: 0.02104055881500244 2023-01-22 07:38:22.926471: step: 276/77, loss: 0.00035322303301654756 2023-01-22 07:38:24.166985: step: 280/77, loss: 0.02914784848690033 2023-01-22 07:38:25.447219: step: 284/77, loss: 0.016437670215964317 2023-01-22 07:38:26.750626: step: 288/77, loss: 0.0516701266169548 2023-01-22 07:38:28.080560: step: 292/77, loss: 0.01641707308590412 2023-01-22 07:38:29.381339: step: 296/77, loss: 0.04560348019003868 2023-01-22 07:38:30.673669: step: 300/77, loss: 0.027226369827985764 2023-01-22 07:38:31.976436: step: 304/77, loss: 0.016126804053783417 2023-01-22 07:38:33.328247: step: 308/77, loss: 0.009214092046022415 2023-01-22 07:38:34.607902: step: 312/77, loss: 0.019477643072605133 2023-01-22 07:38:35.925071: step: 316/77, loss: 0.0008030800381675363 2023-01-22 07:38:37.197628: step: 320/77, loss: 0.03474152833223343 2023-01-22 07:38:38.534088: step: 324/77, loss: 0.0069403029046952724 2023-01-22 07:38:39.871715: step: 328/77, loss: 0.03761206567287445 2023-01-22 07:38:41.160055: step: 332/77, loss: 0.01154150441288948 2023-01-22 07:38:42.457065: step: 336/77, loss: 0.04174396023154259 2023-01-22 07:38:43.725616: step: 340/77, loss: 0.07245483994483948 2023-01-22 07:38:45.011126: step: 344/77, loss: 0.03308962658047676 2023-01-22 07:38:46.290009: step: 348/77, loss: 0.052312374114990234 2023-01-22 07:38:47.676157: step: 352/77, loss: 0.0009306677966378629 2023-01-22 07:38:49.022298: step: 356/77, loss: 0.02453486993908882 2023-01-22 07:38:50.294521: step: 360/77, loss: 0.01928197778761387 2023-01-22 07:38:51.598210: step: 364/77, loss: 0.048561591655015945 2023-01-22 07:38:52.879909: step: 368/77, loss: 0.02670525759458542 2023-01-22 07:38:54.186662: step: 372/77, loss: 0.002359720878303051 2023-01-22 07:38:55.516310: step: 376/77, loss: 0.015507223084568977 2023-01-22 07:38:56.780944: step: 380/77, loss: 0.019268155097961426 2023-01-22 07:38:58.090550: step: 384/77, loss: 0.0013935527531430125 2023-01-22 07:38:59.383225: step: 388/77, loss: 0.05123730003833771 ================================================== Loss: 0.023 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Chinese: {'template': {'p': 0.8352941176470589, 'r': 0.5419847328244275, 'f1': 0.6574074074074074}, 'slot': {'p': 0.5, 'r': 0.01084010840108401, 'f1': 0.021220159151193636}, 'combined': 0.01395028981235878, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Korean: {'template': {'p': 0.8333333333333334, 'r': 0.5343511450381679, 'f1': 0.6511627906976745}, 'slot': {'p': 0.5, 'r': 0.01084010840108401, 'f1': 0.021220159151193636}, 'combined': 0.013817778051940044, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Russian: {'template': {'p': 0.8255813953488372, 'r': 0.5419847328244275, 'f1': 0.6543778801843319}, 'slot': {'p': 0.5, 'r': 0.01084010840108401, 'f1': 0.021220159151193636}, 'combined': 0.013886002762532242, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 8 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:40:41.101979: step: 4/77, loss: 0.03130660578608513 2023-01-22 07:40:42.419571: step: 8/77, loss: 0.06778450310230255 2023-01-22 07:40:43.726955: step: 12/77, loss: 0.011556878685951233 2023-01-22 07:40:45.002904: step: 16/77, loss: 0.009970860555768013 2023-01-22 07:40:46.271096: step: 20/77, loss: 0.00028988681151531637 2023-01-22 07:40:47.598494: step: 24/77, loss: 0.021246779710054398 2023-01-22 07:40:48.915765: step: 28/77, loss: 0.04434437304735184 2023-01-22 07:40:50.212659: step: 32/77, loss: 0.06581749022006989 2023-01-22 07:40:51.527553: step: 36/77, loss: 0.014421924948692322 2023-01-22 07:40:52.791613: step: 40/77, loss: 0.021927639842033386 2023-01-22 07:40:54.036992: step: 44/77, loss: 0.016783470287919044 2023-01-22 07:40:55.308387: step: 48/77, loss: 0.024184659123420715 2023-01-22 07:40:56.623057: step: 52/77, loss: 0.04589436203241348 2023-01-22 07:40:57.916474: step: 56/77, loss: 0.001691319514065981 2023-01-22 07:40:59.238902: step: 60/77, loss: 0.10144390165805817 2023-01-22 07:41:00.464401: step: 64/77, loss: 0.023332679644227028 2023-01-22 07:41:01.736365: step: 68/77, loss: 0.0003049979859497398 2023-01-22 07:41:03.003173: step: 72/77, loss: 0.010500997304916382 2023-01-22 07:41:04.295539: step: 76/77, loss: 0.0020711207762360573 2023-01-22 07:41:05.571936: step: 80/77, loss: 0.03156790882349014 2023-01-22 07:41:06.820314: step: 84/77, loss: 0.010768542066216469 2023-01-22 07:41:08.102371: step: 88/77, loss: 0.006248690187931061 2023-01-22 07:41:09.370260: step: 92/77, loss: 0.006202485412359238 2023-01-22 07:41:10.611246: step: 96/77, loss: 0.002403137506917119 2023-01-22 07:41:11.872409: step: 100/77, loss: 0.05718894302845001 2023-01-22 07:41:13.191611: step: 104/77, loss: 0.0024291344452649355 2023-01-22 07:41:14.507078: step: 108/77, loss: 0.011834661476314068 2023-01-22 07:41:15.810220: step: 112/77, loss: 0.0486491434276104 2023-01-22 07:41:17.056800: step: 116/77, loss: 0.005136487074196339 2023-01-22 07:41:18.325800: step: 120/77, loss: 0.003164472058415413 2023-01-22 07:41:19.596387: step: 124/77, loss: 0.06037474051117897 2023-01-22 07:41:20.888140: step: 128/77, loss: 0.006406131200492382 2023-01-22 07:41:22.220401: step: 132/77, loss: 0.03345470502972603 2023-01-22 07:41:23.483919: step: 136/77, loss: 0.0025869226083159447 2023-01-22 07:41:24.734421: step: 140/77, loss: 0.13212698698043823 2023-01-22 07:41:26.007672: step: 144/77, loss: 0.0013358135474845767 2023-01-22 07:41:27.278287: step: 148/77, loss: 0.030007613822817802 2023-01-22 07:41:28.557075: step: 152/77, loss: 0.10049359500408173 2023-01-22 07:41:29.852108: step: 156/77, loss: 0.03069053217768669 2023-01-22 07:41:31.118101: step: 160/77, loss: 0.017465362325310707 2023-01-22 07:41:32.440825: step: 164/77, loss: 0.006294901482760906 2023-01-22 07:41:33.742455: step: 168/77, loss: 0.0030764213297516108 2023-01-22 07:41:35.053203: step: 172/77, loss: 0.033109307289123535 2023-01-22 07:41:36.367206: step: 176/77, loss: 0.11769656091928482 2023-01-22 07:41:37.677359: step: 180/77, loss: 0.04393400251865387 2023-01-22 07:41:39.016625: step: 184/77, loss: 0.00408775033429265 2023-01-22 07:41:40.329739: step: 188/77, loss: 0.010045966133475304 2023-01-22 07:41:41.663552: step: 192/77, loss: 0.041493598371744156 2023-01-22 07:41:42.993364: step: 196/77, loss: 0.005042455159127712 2023-01-22 07:41:44.326099: step: 200/77, loss: 0.042676590383052826 2023-01-22 07:41:45.601035: step: 204/77, loss: 0.08007384091615677 2023-01-22 07:41:46.902458: step: 208/77, loss: 0.03242093697190285 2023-01-22 07:41:48.185473: step: 212/77, loss: 0.0028606855776160955 2023-01-22 07:41:49.498123: step: 216/77, loss: 0.005781569518148899 2023-01-22 07:41:50.811586: step: 220/77, loss: 0.00999489612877369 2023-01-22 07:41:52.088826: step: 224/77, loss: 0.05668196454644203 2023-01-22 07:41:53.413468: step: 228/77, loss: 0.006618849001824856 2023-01-22 07:41:54.705663: step: 232/77, loss: 0.03185075893998146 2023-01-22 07:41:55.992041: step: 236/77, loss: 0.0239882729947567 2023-01-22 07:41:57.268696: step: 240/77, loss: 0.03320767357945442 2023-01-22 07:41:58.553575: step: 244/77, loss: 0.006629294715821743 2023-01-22 07:41:59.872213: step: 248/77, loss: 0.05239451304078102 2023-01-22 07:42:01.144557: step: 252/77, loss: 0.030738556757569313 2023-01-22 07:42:02.440062: step: 256/77, loss: 0.010103265754878521 2023-01-22 07:42:03.676427: step: 260/77, loss: 0.005290025845170021 2023-01-22 07:42:04.946466: step: 264/77, loss: 0.007733101490885019 2023-01-22 07:42:06.244810: step: 268/77, loss: 0.017247524112462997 2023-01-22 07:42:07.561466: step: 272/77, loss: 0.008979875594377518 2023-01-22 07:42:08.839313: step: 276/77, loss: 0.0008188042556867003 2023-01-22 07:42:10.124348: step: 280/77, loss: 0.008541867136955261 2023-01-22 07:42:11.449078: step: 284/77, loss: 0.018784543499350548 2023-01-22 07:42:12.728427: step: 288/77, loss: 0.001347896410152316 2023-01-22 07:42:14.027841: step: 292/77, loss: 0.005106473341584206 2023-01-22 07:42:15.320803: step: 296/77, loss: 0.0009395399829372764 2023-01-22 07:42:16.567361: step: 300/77, loss: 0.00232081301510334 2023-01-22 07:42:17.816147: step: 304/77, loss: 0.058472901582717896 2023-01-22 07:42:19.023290: step: 308/77, loss: 0.01651082932949066 2023-01-22 07:42:20.316826: step: 312/77, loss: 0.014799586497247219 2023-01-22 07:42:21.626292: step: 316/77, loss: 0.028199704363942146 2023-01-22 07:42:22.915302: step: 320/77, loss: 0.012422901578247547 2023-01-22 07:42:24.197808: step: 324/77, loss: 0.06933309882879257 2023-01-22 07:42:25.514669: step: 328/77, loss: 0.017022427171468735 2023-01-22 07:42:26.760876: step: 332/77, loss: 0.004448316525667906 2023-01-22 07:42:28.033746: step: 336/77, loss: 0.0006571123376488686 2023-01-22 07:42:29.296287: step: 340/77, loss: 0.011795529164373875 2023-01-22 07:42:30.621344: step: 344/77, loss: 0.03918634355068207 2023-01-22 07:42:31.907643: step: 348/77, loss: 0.00033447035821154714 2023-01-22 07:42:33.262957: step: 352/77, loss: 0.0402047336101532 2023-01-22 07:42:34.579963: step: 356/77, loss: 0.05152732506394386 2023-01-22 07:42:35.838907: step: 360/77, loss: 0.06429329514503479 2023-01-22 07:42:37.134541: step: 364/77, loss: 0.008129747584462166 2023-01-22 07:42:38.449987: step: 368/77, loss: 0.020626302808523178 2023-01-22 07:42:39.769774: step: 372/77, loss: 0.00031090303673408926 2023-01-22 07:42:41.096125: step: 376/77, loss: 0.013201514258980751 2023-01-22 07:42:42.370865: step: 380/77, loss: 0.0004937460180372 2023-01-22 07:42:43.656616: step: 384/77, loss: 0.017196908593177795 2023-01-22 07:42:44.984750: step: 388/77, loss: 0.08186633139848709 ================================================== Loss: 0.025 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Chinese: {'template': {'p': 0.971830985915493, 'r': 0.5267175572519084, 'f1': 0.6831683168316832}, 'slot': {'p': 0.5, 'r': 0.009033423667570008, 'f1': 0.01774622892635315}, 'combined': 0.01212366134572641, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Korean: {'template': {'p': 0.971830985915493, 'r': 0.5267175572519084, 'f1': 0.6831683168316832}, 'slot': {'p': 0.5, 'r': 0.009033423667570008, 'f1': 0.01774622892635315}, 'combined': 0.01212366134572641, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Russian: {'template': {'p': 0.9722222222222222, 'r': 0.5343511450381679, 'f1': 0.6896551724137931}, 'slot': {'p': 0.5, 'r': 0.009033423667570008, 'f1': 0.01774622892635315}, 'combined': 0.012238778569898724, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 9 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:44:26.534113: step: 4/77, loss: 0.0032547954469919205 2023-01-22 07:44:27.920216: step: 8/77, loss: 0.0007828651578165591 2023-01-22 07:44:29.172102: step: 12/77, loss: 0.009474514983594418 2023-01-22 07:44:30.465695: step: 16/77, loss: 0.026587102562189102 2023-01-22 07:44:31.807002: step: 20/77, loss: 0.016950704157352448 2023-01-22 07:44:33.164624: step: 24/77, loss: 0.004516863729804754 2023-01-22 07:44:34.471035: step: 28/77, loss: 0.0068637914955616 2023-01-22 07:44:35.758270: step: 32/77, loss: 0.008322927169501781 2023-01-22 07:44:37.044483: step: 36/77, loss: 0.003309592604637146 2023-01-22 07:44:38.328577: step: 40/77, loss: 0.01013989932835102 2023-01-22 07:44:39.644142: step: 44/77, loss: 0.004847818054258823 2023-01-22 07:44:40.925286: step: 48/77, loss: 0.030007850378751755 2023-01-22 07:44:42.184963: step: 52/77, loss: 0.02581692300736904 2023-01-22 07:44:43.527832: step: 56/77, loss: 0.015968669205904007 2023-01-22 07:44:44.807952: step: 60/77, loss: 0.016311904415488243 2023-01-22 07:44:46.105879: step: 64/77, loss: 0.03854527324438095 2023-01-22 07:44:47.417816: step: 68/77, loss: 0.020433874800801277 2023-01-22 07:44:48.717472: step: 72/77, loss: 0.014859223738312721 2023-01-22 07:44:49.996357: step: 76/77, loss: 0.00025919050676748157 2023-01-22 07:44:51.264891: step: 80/77, loss: 0.02078193984925747 2023-01-22 07:44:52.507673: step: 84/77, loss: 0.014132988639175892 2023-01-22 07:44:53.808011: step: 88/77, loss: 0.012082832865417004 2023-01-22 07:44:55.049949: step: 92/77, loss: 0.0012587098171934485 2023-01-22 07:44:56.362412: step: 96/77, loss: 0.0025687245652079582 2023-01-22 07:44:57.634579: step: 100/77, loss: 0.031106337904930115 2023-01-22 07:44:58.916863: step: 104/77, loss: 0.012444172985851765 2023-01-22 07:45:00.181098: step: 108/77, loss: 0.01632830686867237 2023-01-22 07:45:01.485224: step: 112/77, loss: 0.001803108025342226 2023-01-22 07:45:02.788223: step: 116/77, loss: 0.00217154948040843 2023-01-22 07:45:04.044950: step: 120/77, loss: 0.027000855654478073 2023-01-22 07:45:05.364093: step: 124/77, loss: 0.01967952400445938 2023-01-22 07:45:06.684721: step: 128/77, loss: 0.012175975367426872 2023-01-22 07:45:08.004733: step: 132/77, loss: 0.0006821623537689447 2023-01-22 07:45:09.273168: step: 136/77, loss: 0.026813946664333344 2023-01-22 07:45:10.549241: step: 140/77, loss: 0.001025137840770185 2023-01-22 07:45:11.838824: step: 144/77, loss: 0.044895920902490616 2023-01-22 07:45:13.142718: step: 148/77, loss: 0.04364461451768875 2023-01-22 07:45:14.468773: step: 152/77, loss: 0.07442065328359604 2023-01-22 07:45:15.750623: step: 156/77, loss: 0.010177352465689182 2023-01-22 07:45:17.062642: step: 160/77, loss: 0.011536704376339912 2023-01-22 07:45:18.374249: step: 164/77, loss: 0.017821375280618668 2023-01-22 07:45:19.625493: step: 168/77, loss: 0.035762540996074677 2023-01-22 07:45:20.958478: step: 172/77, loss: 0.010910563170909882 2023-01-22 07:45:22.209551: step: 176/77, loss: 0.1267044097185135 2023-01-22 07:45:23.456837: step: 180/77, loss: 0.00936646293848753 2023-01-22 07:45:24.682677: step: 184/77, loss: 0.011003161780536175 2023-01-22 07:45:25.940602: step: 188/77, loss: 0.013133707456290722 2023-01-22 07:45:27.270514: step: 192/77, loss: 0.0047271656803786755 2023-01-22 07:45:28.571292: step: 196/77, loss: 0.027341635897755623 2023-01-22 07:45:29.880993: step: 200/77, loss: 0.02294914796948433 2023-01-22 07:45:31.202275: step: 204/77, loss: 0.020474707707762718 2023-01-22 07:45:32.491239: step: 208/77, loss: 0.0029907962307333946 2023-01-22 07:45:33.805844: step: 212/77, loss: 0.04676752910017967 2023-01-22 07:45:35.098992: step: 216/77, loss: 0.020993176847696304 2023-01-22 07:45:36.434005: step: 220/77, loss: 0.0022739083506166935 2023-01-22 07:45:37.684773: step: 224/77, loss: 0.004498450551182032 2023-01-22 07:45:38.972704: step: 228/77, loss: 0.014462015591561794 2023-01-22 07:45:40.261060: step: 232/77, loss: 0.011671274900436401 2023-01-22 07:45:41.529731: step: 236/77, loss: 0.004001801833510399 2023-01-22 07:45:42.844400: step: 240/77, loss: 0.009535029530525208 2023-01-22 07:45:44.110951: step: 244/77, loss: 0.01722276769578457 2023-01-22 07:45:45.407145: step: 248/77, loss: 0.008600283414125443 2023-01-22 07:45:46.694258: step: 252/77, loss: 0.04984879493713379 2023-01-22 07:45:48.035633: step: 256/77, loss: 0.01410811860114336 2023-01-22 07:45:49.314652: step: 260/77, loss: 0.00032312856637872756 2023-01-22 07:45:50.590733: step: 264/77, loss: 0.0010134776821359992 2023-01-22 07:45:51.878394: step: 268/77, loss: 0.0083998404443264 2023-01-22 07:45:53.171189: step: 272/77, loss: 0.01889784447848797 2023-01-22 07:45:54.472129: step: 276/77, loss: 2.0189343558740802e-05 2023-01-22 07:45:55.752665: step: 280/77, loss: 0.010106510482728481 2023-01-22 07:45:57.074753: step: 284/77, loss: 0.01588715799152851 2023-01-22 07:45:58.377132: step: 288/77, loss: 0.0029431162402033806 2023-01-22 07:45:59.670970: step: 292/77, loss: 0.03170425444841385 2023-01-22 07:46:00.969590: step: 296/77, loss: 0.011810568161308765 2023-01-22 07:46:02.285291: step: 300/77, loss: 0.016197899356484413 2023-01-22 07:46:03.578867: step: 304/77, loss: 0.01205252856016159 2023-01-22 07:46:04.872044: step: 308/77, loss: 0.0011092599015682936 2023-01-22 07:46:06.189758: step: 312/77, loss: 0.0018312877509742975 2023-01-22 07:46:07.487445: step: 316/77, loss: 0.053495053201913834 2023-01-22 07:46:08.763997: step: 320/77, loss: 0.007342047058045864 2023-01-22 07:46:10.060796: step: 324/77, loss: 0.005907184444367886 2023-01-22 07:46:11.329677: step: 328/77, loss: 0.00495109474286437 2023-01-22 07:46:12.618000: step: 332/77, loss: 0.009428643621504307 2023-01-22 07:46:13.913916: step: 336/77, loss: 0.02674906887114048 2023-01-22 07:46:15.256343: step: 340/77, loss: 0.016145536676049232 2023-01-22 07:46:16.597315: step: 344/77, loss: 0.020640820264816284 2023-01-22 07:46:17.856949: step: 348/77, loss: 0.04335368797183037 2023-01-22 07:46:19.109023: step: 352/77, loss: 0.014391623437404633 2023-01-22 07:46:20.378689: step: 356/77, loss: 8.42200024635531e-05 2023-01-22 07:46:21.707667: step: 360/77, loss: 2.42486839852063e-05 2023-01-22 07:46:23.049308: step: 364/77, loss: 0.008449288085103035 2023-01-22 07:46:24.397748: step: 368/77, loss: 0.03136660158634186 2023-01-22 07:46:25.696282: step: 372/77, loss: 0.002738418523222208 2023-01-22 07:46:26.994376: step: 376/77, loss: 0.022241732105612755 2023-01-22 07:46:28.297510: step: 380/77, loss: 0.007063223980367184 2023-01-22 07:46:29.628313: step: 384/77, loss: 0.02287823148071766 2023-01-22 07:46:30.896104: step: 388/77, loss: 0.07854347676038742 ================================================== Loss: 0.017 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Chinese: {'template': {'p': 0.9594594594594594, 'r': 0.5419847328244275, 'f1': 0.6926829268292682}, 'slot': {'p': 0.42424242424242425, 'r': 0.012646793134598013, 'f1': 0.024561403508771933}, 'combined': 0.017013264869490802, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Korean: {'template': {'p': 0.9594594594594594, 'r': 0.5419847328244275, 'f1': 0.6926829268292682}, 'slot': {'p': 0.4375, 'r': 0.012646793134598013, 'f1': 0.024582967515364356}, 'combined': 0.017028201888691405, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 9} Test Russian: {'template': {'p': 0.9594594594594594, 'r': 0.5419847328244275, 'f1': 0.6926829268292682}, 'slot': {'p': 0.45454545454545453, 'r': 0.013550135501355014, 'f1': 0.02631578947368421}, 'combined': 0.018228498074454428, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 10 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:48:12.401397: step: 4/77, loss: 0.0334661565721035 2023-01-22 07:48:13.649001: step: 8/77, loss: 0.0053428830578923225 2023-01-22 07:48:14.949524: step: 12/77, loss: 0.012940764427185059 2023-01-22 07:48:16.263607: step: 16/77, loss: 0.03215727582573891 2023-01-22 07:48:17.578763: step: 20/77, loss: 0.01621071994304657 2023-01-22 07:48:18.867092: step: 24/77, loss: 6.636667239945382e-05 2023-01-22 07:48:20.127030: step: 28/77, loss: 0.0008667551446706057 2023-01-22 07:48:21.477771: step: 32/77, loss: 0.027174003422260284 2023-01-22 07:48:22.753582: step: 36/77, loss: 0.010804384015500546 2023-01-22 07:48:24.077521: step: 40/77, loss: 0.0023620270658284426 2023-01-22 07:48:25.358296: step: 44/77, loss: 0.007145829498767853 2023-01-22 07:48:26.629353: step: 48/77, loss: 0.009835539385676384 2023-01-22 07:48:27.950947: step: 52/77, loss: 0.002078540623188019 2023-01-22 07:48:29.231689: step: 56/77, loss: 0.0011278409510850906 2023-01-22 07:48:30.526355: step: 60/77, loss: 0.019689617678523064 2023-01-22 07:48:31.746420: step: 64/77, loss: 0.02498197928071022 2023-01-22 07:48:33.059642: step: 68/77, loss: 0.0023460511583834887 2023-01-22 07:48:34.292415: step: 72/77, loss: 0.05652249976992607 2023-01-22 07:48:35.612206: step: 76/77, loss: 0.038843028247356415 2023-01-22 07:48:36.895350: step: 80/77, loss: 0.036456163972616196 2023-01-22 07:48:38.198312: step: 84/77, loss: 0.12923657894134521 2023-01-22 07:48:39.470375: step: 88/77, loss: 0.004103332292288542 2023-01-22 07:48:40.730533: step: 92/77, loss: 0.030966389924287796 2023-01-22 07:48:41.946938: step: 96/77, loss: 0.002118196338415146 2023-01-22 07:48:43.268857: step: 100/77, loss: 0.007329493761062622 2023-01-22 07:48:44.545084: step: 104/77, loss: 0.002037853468209505 2023-01-22 07:48:45.832952: step: 108/77, loss: 7.579373777844012e-05 2023-01-22 07:48:47.112660: step: 112/77, loss: 0.042532891035079956 2023-01-22 07:48:48.388008: step: 116/77, loss: 0.006116456817835569 2023-01-22 07:48:49.740828: step: 120/77, loss: 0.05537469685077667 2023-01-22 07:48:51.084082: step: 124/77, loss: 0.06468012183904648 2023-01-22 07:48:52.336389: step: 128/77, loss: 0.004855441860854626 2023-01-22 07:48:53.650339: step: 132/77, loss: 0.015735818073153496 2023-01-22 07:48:54.930597: step: 136/77, loss: 0.00012620292545761913 2023-01-22 07:48:56.196252: step: 140/77, loss: 0.007197881117463112 2023-01-22 07:48:57.540074: step: 144/77, loss: 7.069560524541885e-05 2023-01-22 07:48:58.845327: step: 148/77, loss: 0.00013055774616077542 2023-01-22 07:49:00.122877: step: 152/77, loss: 0.0017158358823508024 2023-01-22 07:49:01.378065: step: 156/77, loss: 0.034569624811410904 2023-01-22 07:49:02.644218: step: 160/77, loss: 0.0027393088676035404 2023-01-22 07:49:03.931956: step: 164/77, loss: 0.010289029218256474 2023-01-22 07:49:05.240578: step: 168/77, loss: 0.0027779859956353903 2023-01-22 07:49:06.514500: step: 172/77, loss: 0.0007916667382232845 2023-01-22 07:49:07.806159: step: 176/77, loss: 0.20995254814624786 2023-01-22 07:49:09.103235: step: 180/77, loss: 0.1505231112241745 2023-01-22 07:49:10.461504: step: 184/77, loss: 0.0010892642894759774 2023-01-22 07:49:11.763683: step: 188/77, loss: 0.012236889451742172 2023-01-22 07:49:13.079863: step: 192/77, loss: 0.005136884283274412 2023-01-22 07:49:14.362055: step: 196/77, loss: 0.03500115126371384 2023-01-22 07:49:15.622336: step: 200/77, loss: 0.024591337889432907 2023-01-22 07:49:16.918580: step: 204/77, loss: 0.018696283921599388 2023-01-22 07:49:18.188134: step: 208/77, loss: 0.010986441746354103 2023-01-22 07:49:19.518264: step: 212/77, loss: 0.08380329608917236 2023-01-22 07:49:20.825041: step: 216/77, loss: 0.02270100638270378 2023-01-22 07:49:22.088157: step: 220/77, loss: 0.00021239221678115427 2023-01-22 07:49:23.356086: step: 224/77, loss: 0.026787076145410538 2023-01-22 07:49:24.677604: step: 228/77, loss: 0.08860864490270615 2023-01-22 07:49:26.000803: step: 232/77, loss: 0.00024117573047988117 2023-01-22 07:49:27.321964: step: 236/77, loss: 0.0002482231648173183 2023-01-22 07:49:28.664303: step: 240/77, loss: 0.00014384181122295558 2023-01-22 07:49:29.935835: step: 244/77, loss: 0.03744838014245033 2023-01-22 07:49:31.210938: step: 248/77, loss: 0.006567574106156826 2023-01-22 07:49:32.535566: step: 252/77, loss: 0.017972031608223915 2023-01-22 07:49:33.863295: step: 256/77, loss: 0.0023253969848155975 2023-01-22 07:49:35.132277: step: 260/77, loss: 0.02187388949096203 2023-01-22 07:49:36.474811: step: 264/77, loss: 0.01341575849801302 2023-01-22 07:49:37.762778: step: 268/77, loss: 0.004433206748217344 2023-01-22 07:49:39.029892: step: 272/77, loss: 0.007166629657149315 2023-01-22 07:49:40.331815: step: 276/77, loss: 0.017453910782933235 2023-01-22 07:49:41.641088: step: 280/77, loss: 0.04777868464589119 2023-01-22 07:49:42.955621: step: 284/77, loss: 0.01083751954138279 2023-01-22 07:49:44.207445: step: 288/77, loss: 0.007762902416288853 2023-01-22 07:49:45.491878: step: 292/77, loss: 0.03866489231586456 2023-01-22 07:49:46.743280: step: 296/77, loss: 0.016943011432886124 2023-01-22 07:49:48.047996: step: 300/77, loss: 0.0027338904328644276 2023-01-22 07:49:49.298638: step: 304/77, loss: 0.00044834212167188525 2023-01-22 07:49:50.547286: step: 308/77, loss: 0.15176521241664886 2023-01-22 07:49:51.818029: step: 312/77, loss: 0.0882883220911026 2023-01-22 07:49:53.137468: step: 316/77, loss: 0.04136144742369652 2023-01-22 07:49:54.441380: step: 320/77, loss: 0.03687924146652222 2023-01-22 07:49:55.692372: step: 324/77, loss: 0.0247676782310009 2023-01-22 07:49:57.009938: step: 328/77, loss: 0.004467879422008991 2023-01-22 07:49:58.323198: step: 332/77, loss: 0.08892552554607391 2023-01-22 07:49:59.595367: step: 336/77, loss: 0.043028660118579865 2023-01-22 07:50:00.901442: step: 340/77, loss: 0.004968172404915094 2023-01-22 07:50:02.164522: step: 344/77, loss: 0.0011426556156948209 2023-01-22 07:50:03.429066: step: 348/77, loss: 0.022866141051054 2023-01-22 07:50:04.726097: step: 352/77, loss: 0.02279716543853283 2023-01-22 07:50:06.031673: step: 356/77, loss: 0.08208740502595901 2023-01-22 07:50:07.353908: step: 360/77, loss: 0.0033330784644931555 2023-01-22 07:50:08.617209: step: 364/77, loss: 0.02521967515349388 2023-01-22 07:50:09.888799: step: 368/77, loss: 1.1281890692771412e-05 2023-01-22 07:50:11.179230: step: 372/77, loss: 0.005154577549546957 2023-01-22 07:50:12.507691: step: 376/77, loss: 0.0027619428001344204 2023-01-22 07:50:13.842755: step: 380/77, loss: 0.0311444029211998 2023-01-22 07:50:15.121356: step: 384/77, loss: 0.031260937452316284 2023-01-22 07:50:16.369013: step: 388/77, loss: 0.023465558886528015 ================================================== Loss: 0.025 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Chinese: {'template': {'p': 0.9857142857142858, 'r': 0.5267175572519084, 'f1': 0.6865671641791045}, 'slot': {'p': 0.5, 'r': 0.00993676603432701, 'f1': 0.01948627103631532}, 'combined': 0.013378633845828429, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Korean: {'template': {'p': 0.9855072463768116, 'r': 0.5190839694656488, 'f1': 0.68}, 'slot': {'p': 0.5, 'r': 0.00993676603432701, 'f1': 0.01948627103631532}, 'combined': 0.01325066430469442, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Russian: {'template': {'p': 0.9857142857142858, 'r': 0.5267175572519084, 'f1': 0.6865671641791045}, 'slot': {'p': 0.4782608695652174, 'r': 0.00993676603432701, 'f1': 0.019469026548672566}, 'combined': 0.013366794346849821, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 11 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:51:57.548360: step: 4/77, loss: 0.01584513485431671 2023-01-22 07:51:58.873161: step: 8/77, loss: 0.011110379360616207 2023-01-22 07:52:00.166430: step: 12/77, loss: 0.007822764106094837 2023-01-22 07:52:01.461651: step: 16/77, loss: 0.003719520289450884 2023-01-22 07:52:02.773870: step: 20/77, loss: 0.016872625797986984 2023-01-22 07:52:04.082903: step: 24/77, loss: 5.1765411626547575e-05 2023-01-22 07:52:05.364306: step: 28/77, loss: 0.035905055701732635 2023-01-22 07:52:06.641794: step: 32/77, loss: 0.012639195658266544 2023-01-22 07:52:07.943613: step: 36/77, loss: 0.003601525444537401 2023-01-22 07:52:09.322619: step: 40/77, loss: 0.013114494271576405 2023-01-22 07:52:10.598801: step: 44/77, loss: 0.012906393967568874 2023-01-22 07:52:11.915162: step: 48/77, loss: 0.00372703792527318 2023-01-22 07:52:13.257913: step: 52/77, loss: 0.006472621578723192 2023-01-22 07:52:14.545301: step: 56/77, loss: 8.66968184709549e-05 2023-01-22 07:52:15.820381: step: 60/77, loss: 0.0013746072072535753 2023-01-22 07:52:17.066553: step: 64/77, loss: 0.03838416934013367 2023-01-22 07:52:18.331272: step: 68/77, loss: 0.0019289760384708643 2023-01-22 07:52:19.622932: step: 72/77, loss: 0.002471780404448509 2023-01-22 07:52:20.905730: step: 76/77, loss: 0.026333516463637352 2023-01-22 07:52:22.152335: step: 80/77, loss: 0.012460384517908096 2023-01-22 07:52:23.457485: step: 84/77, loss: 0.04798956960439682 2023-01-22 07:52:24.790826: step: 88/77, loss: 0.0010338500142097473 2023-01-22 07:52:26.083504: step: 92/77, loss: 0.0032949044834822416 2023-01-22 07:52:27.389689: step: 96/77, loss: 0.016322335228323936 2023-01-22 07:52:28.691305: step: 100/77, loss: 0.0018908950733020902 2023-01-22 07:52:29.981252: step: 104/77, loss: 0.017116904258728027 2023-01-22 07:52:31.280655: step: 108/77, loss: 0.06026684492826462 2023-01-22 07:52:32.610584: step: 112/77, loss: 0.019831260666251183 2023-01-22 07:52:33.900246: step: 116/77, loss: 0.009897179901599884 2023-01-22 07:52:35.218772: step: 120/77, loss: 0.10649476200342178 2023-01-22 07:52:36.494232: step: 124/77, loss: 0.006906839553266764 2023-01-22 07:52:37.792183: step: 128/77, loss: 0.011276248842477798 2023-01-22 07:52:39.081003: step: 132/77, loss: 0.006750210653990507 2023-01-22 07:52:40.361566: step: 136/77, loss: 0.017680184915661812 2023-01-22 07:52:41.635277: step: 140/77, loss: 0.00014472060138359666 2023-01-22 07:52:42.942694: step: 144/77, loss: 0.013520104810595512 2023-01-22 07:52:44.241157: step: 148/77, loss: 0.012814250774681568 2023-01-22 07:52:45.542492: step: 152/77, loss: 0.011074498295783997 2023-01-22 07:52:46.826886: step: 156/77, loss: 0.050522495061159134 2023-01-22 07:52:48.135252: step: 160/77, loss: 0.0018916124245151877 2023-01-22 07:52:49.410687: step: 164/77, loss: 0.02445100247859955 2023-01-22 07:52:50.762285: step: 168/77, loss: 0.05349840223789215 2023-01-22 07:52:52.029976: step: 172/77, loss: 0.04895170032978058 2023-01-22 07:52:53.305968: step: 176/77, loss: 7.250769704114646e-05 2023-01-22 07:52:54.614387: step: 180/77, loss: 0.002005907241255045 2023-01-22 07:52:55.883867: step: 184/77, loss: 0.028574928641319275 2023-01-22 07:52:57.179063: step: 188/77, loss: 0.008724729530513287 2023-01-22 07:52:58.497005: step: 192/77, loss: 0.02186712622642517 2023-01-22 07:52:59.795731: step: 196/77, loss: 0.04871781915426254 2023-01-22 07:53:01.100077: step: 200/77, loss: 0.008077450096607208 2023-01-22 07:53:02.397128: step: 204/77, loss: 0.010768383741378784 2023-01-22 07:53:03.716928: step: 208/77, loss: 0.010555818676948547 2023-01-22 07:53:05.019707: step: 212/77, loss: 0.03762510418891907 2023-01-22 07:53:06.304674: step: 216/77, loss: 0.0065259141847491264 2023-01-22 07:53:07.618226: step: 220/77, loss: 0.01817585900425911 2023-01-22 07:53:08.948649: step: 224/77, loss: 0.005752554163336754 2023-01-22 07:53:10.239523: step: 228/77, loss: 0.021643973886966705 2023-01-22 07:53:11.519107: step: 232/77, loss: 0.04298442602157593 2023-01-22 07:53:12.829961: step: 236/77, loss: 0.01501567754894495 2023-01-22 07:53:14.127107: step: 240/77, loss: 0.006192583125084639 2023-01-22 07:53:15.408483: step: 244/77, loss: 0.004610065370798111 2023-01-22 07:53:16.678414: step: 248/77, loss: 0.0006969093228690326 2023-01-22 07:53:17.965236: step: 252/77, loss: 0.07962727546691895 2023-01-22 07:53:19.241870: step: 256/77, loss: 0.01237800344824791 2023-01-22 07:53:20.576783: step: 260/77, loss: 0.027706127613782883 2023-01-22 07:53:21.864222: step: 264/77, loss: 0.00938791036605835 2023-01-22 07:53:23.153293: step: 268/77, loss: 0.024889472872018814 2023-01-22 07:53:24.420082: step: 272/77, loss: 0.04661480337381363 2023-01-22 07:53:25.669125: step: 276/77, loss: 0.0010684910230338573 2023-01-22 07:53:26.980015: step: 280/77, loss: 0.009528428316116333 2023-01-22 07:53:28.268034: step: 284/77, loss: 0.03483160585165024 2023-01-22 07:53:29.509685: step: 288/77, loss: 0.011898556724190712 2023-01-22 07:53:30.828289: step: 292/77, loss: 0.00047971383901312947 2023-01-22 07:53:32.093026: step: 296/77, loss: 0.061640918254852295 2023-01-22 07:53:33.420223: step: 300/77, loss: 0.0016488262917846441 2023-01-22 07:53:34.709685: step: 304/77, loss: 0.02181958593428135 2023-01-22 07:53:36.022184: step: 308/77, loss: 0.004883064888417721 2023-01-22 07:53:37.349797: step: 312/77, loss: 0.00666409358382225 2023-01-22 07:53:38.618205: step: 316/77, loss: 0.06552506983280182 2023-01-22 07:53:39.865328: step: 320/77, loss: 0.00029439933132380247 2023-01-22 07:53:41.174181: step: 324/77, loss: 0.0016701119020581245 2023-01-22 07:53:42.432724: step: 328/77, loss: 0.013529755175113678 2023-01-22 07:53:43.783699: step: 332/77, loss: 0.019527696073055267 2023-01-22 07:53:45.061914: step: 336/77, loss: 0.020140524953603745 2023-01-22 07:53:46.342981: step: 340/77, loss: 0.018858810886740685 2023-01-22 07:53:47.646331: step: 344/77, loss: 0.011388593353331089 2023-01-22 07:53:48.937638: step: 348/77, loss: 0.0008530689519830048 2023-01-22 07:53:50.256468: step: 352/77, loss: 0.002056154888123274 2023-01-22 07:53:51.584725: step: 356/77, loss: 0.009871527552604675 2023-01-22 07:53:52.865301: step: 360/77, loss: 0.0023899408988654613 2023-01-22 07:53:54.172011: step: 364/77, loss: 0.0003346440498717129 2023-01-22 07:53:55.470553: step: 368/77, loss: 0.0314149372279644 2023-01-22 07:53:56.768293: step: 372/77, loss: 0.005590423475950956 2023-01-22 07:53:58.046116: step: 376/77, loss: 0.005169984884560108 2023-01-22 07:53:59.361945: step: 380/77, loss: 0.03703976050019264 2023-01-22 07:54:00.697575: step: 384/77, loss: 0.0014688130468130112 2023-01-22 07:54:01.985668: step: 388/77, loss: 0.01273888349533081 ================================================== Loss: 0.017 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Chinese: {'template': {'p': 0.9259259259259259, 'r': 0.5725190839694656, 'f1': 0.7075471698113207}, 'slot': {'p': 0.4, 'r': 0.016260162601626018, 'f1': 0.03125000000000001}, 'combined': 0.022110849056603776, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Korean: {'template': {'p': 0.9240506329113924, 'r': 0.5572519083969466, 'f1': 0.6952380952380953}, 'slot': {'p': 0.38636363636363635, 'r': 0.015356820234869015, 'f1': 0.02953953084274544}, 'combined': 0.02053700715733731, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Russian: {'template': {'p': 0.9146341463414634, 'r': 0.5725190839694656, 'f1': 0.704225352112676}, 'slot': {'p': 0.3829787234042553, 'r': 0.016260162601626018, 'f1': 0.03119584055459273}, 'combined': 0.021968901799008962, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 12 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:55:43.467347: step: 4/77, loss: 0.056859564036130905 2023-01-22 07:55:44.763699: step: 8/77, loss: 0.00018933211686089635 2023-01-22 07:55:46.054141: step: 12/77, loss: 0.0015684763202443719 2023-01-22 07:55:47.304936: step: 16/77, loss: 0.0037419775035232306 2023-01-22 07:55:48.576663: step: 20/77, loss: 0.00010413004201836884 2023-01-22 07:55:49.856236: step: 24/77, loss: 0.0036584739573299885 2023-01-22 07:55:51.142602: step: 28/77, loss: 0.006588386371731758 2023-01-22 07:55:52.411407: step: 32/77, loss: 0.012956037186086178 2023-01-22 07:55:53.743117: step: 36/77, loss: 0.05439409613609314 2023-01-22 07:55:55.067789: step: 40/77, loss: 0.08067908883094788 2023-01-22 07:55:56.386104: step: 44/77, loss: 0.028869925066828728 2023-01-22 07:55:57.653429: step: 48/77, loss: 0.0002178424911107868 2023-01-22 07:55:58.901399: step: 52/77, loss: 0.04830165579915047 2023-01-22 07:56:00.224210: step: 56/77, loss: 0.0012148318346589804 2023-01-22 07:56:01.546004: step: 60/77, loss: 0.0006402688450179994 2023-01-22 07:56:02.811823: step: 64/77, loss: 0.0008659854647703469 2023-01-22 07:56:04.102462: step: 68/77, loss: 0.03003714233636856 2023-01-22 07:56:05.410061: step: 72/77, loss: 0.02386688068509102 2023-01-22 07:56:06.701593: step: 76/77, loss: 0.042517803609371185 2023-01-22 07:56:07.905225: step: 80/77, loss: 0.0011577220866456628 2023-01-22 07:56:09.185426: step: 84/77, loss: 0.010854844003915787 2023-01-22 07:56:10.466984: step: 88/77, loss: 0.040769848972558975 2023-01-22 07:56:11.735074: step: 92/77, loss: 0.0005059882532805204 2023-01-22 07:56:13.019425: step: 96/77, loss: 0.016970816999673843 2023-01-22 07:56:14.268677: step: 100/77, loss: 0.02442902699112892 2023-01-22 07:56:15.526489: step: 104/77, loss: 0.0003359513357281685 2023-01-22 07:56:16.827498: step: 108/77, loss: 0.04959928244352341 2023-01-22 07:56:18.182330: step: 112/77, loss: 0.0037586591206490993 2023-01-22 07:56:19.518258: step: 116/77, loss: 0.015655245631933212 2023-01-22 07:56:20.855485: step: 120/77, loss: 0.007982890121638775 2023-01-22 07:56:22.136269: step: 124/77, loss: 0.02343090996146202 2023-01-22 07:56:23.432624: step: 128/77, loss: 0.007850190624594688 2023-01-22 07:56:24.720296: step: 132/77, loss: 0.0009188792901113629 2023-01-22 07:56:26.037072: step: 136/77, loss: 0.000135556620080024 2023-01-22 07:56:27.294556: step: 140/77, loss: 0.0010540832299739122 2023-01-22 07:56:28.594859: step: 144/77, loss: 0.0083523690700531 2023-01-22 07:56:29.901521: step: 148/77, loss: 0.008881242014467716 2023-01-22 07:56:31.194060: step: 152/77, loss: 0.009107242338359356 2023-01-22 07:56:32.447998: step: 156/77, loss: 0.003329481929540634 2023-01-22 07:56:33.712518: step: 160/77, loss: 0.0020151210483163595 2023-01-22 07:56:34.968489: step: 164/77, loss: 0.006416513584554195 2023-01-22 07:56:36.253554: step: 168/77, loss: 0.0237065888941288 2023-01-22 07:56:37.523960: step: 172/77, loss: 0.004142489284276962 2023-01-22 07:56:38.805613: step: 176/77, loss: 0.013825026340782642 2023-01-22 07:56:40.079168: step: 180/77, loss: 0.003928538877516985 2023-01-22 07:56:41.374717: step: 184/77, loss: 0.02468213438987732 2023-01-22 07:56:42.655186: step: 188/77, loss: 0.01477570179849863 2023-01-22 07:56:43.933747: step: 192/77, loss: 0.04188038408756256 2023-01-22 07:56:45.217583: step: 196/77, loss: 0.012145506218075752 2023-01-22 07:56:46.552427: step: 200/77, loss: 0.0013958922354504466 2023-01-22 07:56:47.852539: step: 204/77, loss: 0.01585123874247074 2023-01-22 07:56:49.158995: step: 208/77, loss: 0.012287406250834465 2023-01-22 07:56:50.430573: step: 212/77, loss: 0.009149945341050625 2023-01-22 07:56:51.711799: step: 216/77, loss: 0.0019453726708889008 2023-01-22 07:56:53.031766: step: 220/77, loss: 0.0008188173524104059 2023-01-22 07:56:54.322520: step: 224/77, loss: 0.0005111164064146578 2023-01-22 07:56:55.650865: step: 228/77, loss: 0.0007848279201425612 2023-01-22 07:56:56.989514: step: 232/77, loss: 0.0243210569024086 2023-01-22 07:56:58.227727: step: 236/77, loss: 0.0005640236777253449 2023-01-22 07:56:59.540056: step: 240/77, loss: 0.0072174943052232265 2023-01-22 07:57:00.825640: step: 244/77, loss: 0.0010060467757284641 2023-01-22 07:57:02.069112: step: 248/77, loss: 0.01647023856639862 2023-01-22 07:57:03.353641: step: 252/77, loss: 0.012628314085304737 2023-01-22 07:57:04.667811: step: 256/77, loss: 0.0013272122014313936 2023-01-22 07:57:05.932203: step: 260/77, loss: 0.02216571941971779 2023-01-22 07:57:07.241660: step: 264/77, loss: 0.0006854430539533496 2023-01-22 07:57:08.509532: step: 268/77, loss: 0.009277165867388248 2023-01-22 07:57:09.801421: step: 272/77, loss: 0.010998756624758244 2023-01-22 07:57:11.120078: step: 276/77, loss: 0.0013080434873700142 2023-01-22 07:57:12.384223: step: 280/77, loss: 0.006445006933063269 2023-01-22 07:57:13.680388: step: 284/77, loss: 0.011819546110928059 2023-01-22 07:57:14.960411: step: 288/77, loss: 0.0968322679400444 2023-01-22 07:57:16.233464: step: 292/77, loss: 0.020905228331685066 2023-01-22 07:57:17.525728: step: 296/77, loss: 0.004556386265903711 2023-01-22 07:57:18.832942: step: 300/77, loss: 0.0011466899886727333 2023-01-22 07:57:20.178824: step: 304/77, loss: 0.03180212900042534 2023-01-22 07:57:21.476249: step: 308/77, loss: 0.0031903793569654226 2023-01-22 07:57:22.742435: step: 312/77, loss: 0.016256894916296005 2023-01-22 07:57:24.062132: step: 316/77, loss: 0.0044269743375480175 2023-01-22 07:57:25.368834: step: 320/77, loss: 0.0002046520821750164 2023-01-22 07:57:26.672314: step: 324/77, loss: 0.007200018502771854 2023-01-22 07:57:27.991248: step: 328/77, loss: 0.037378519773483276 2023-01-22 07:57:29.272268: step: 332/77, loss: 0.08489402383565903 2023-01-22 07:57:30.584226: step: 336/77, loss: 0.034434448927640915 2023-01-22 07:57:31.837876: step: 340/77, loss: 0.0072105564177036285 2023-01-22 07:57:33.177163: step: 344/77, loss: 0.00021478811686392874 2023-01-22 07:57:34.522532: step: 348/77, loss: 0.001809608656913042 2023-01-22 07:57:35.796834: step: 352/77, loss: 0.0011827481212094426 2023-01-22 07:57:37.043463: step: 356/77, loss: 0.012472348287701607 2023-01-22 07:57:38.351045: step: 360/77, loss: 0.0236879363656044 2023-01-22 07:57:39.680675: step: 364/77, loss: 0.014688440598547459 2023-01-22 07:57:40.961484: step: 368/77, loss: 0.0033161991741508245 2023-01-22 07:57:42.267037: step: 372/77, loss: 0.07218929380178452 2023-01-22 07:57:43.524426: step: 376/77, loss: 0.00468746293336153 2023-01-22 07:57:44.783160: step: 380/77, loss: 0.037831034511327744 2023-01-22 07:57:46.067682: step: 384/77, loss: 0.00019326545589137822 2023-01-22 07:57:47.404430: step: 388/77, loss: 0.0001353693223791197 ================================================== Loss: 0.015 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Chinese: {'template': {'p': 0.9726027397260274, 'r': 0.5419847328244275, 'f1': 0.696078431372549}, 'slot': {'p': 0.4375, 'r': 0.012646793134598013, 'f1': 0.024582967515364356}, 'combined': 0.01711167346657715, 'epoch': 12} Dev Korean: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 12} Test Korean: {'template': {'p': 0.9722222222222222, 'r': 0.5343511450381679, 'f1': 0.6896551724137931}, 'slot': {'p': 0.4375, 'r': 0.012646793134598013, 'f1': 0.024582967515364356}, 'combined': 0.01695377070025128, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Russian: {'template': {'p': 0.9726027397260274, 'r': 0.5419847328244275, 'f1': 0.696078431372549}, 'slot': {'p': 0.46875, 'r': 0.013550135501355014, 'f1': 0.02633889376646181}, 'combined': 0.018333935857046946, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 13 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:59:29.107140: step: 4/77, loss: 0.0008186142658814788 2023-01-22 07:59:30.460686: step: 8/77, loss: 0.004512900486588478 2023-01-22 07:59:31.729163: step: 12/77, loss: 0.008835289627313614 2023-01-22 07:59:33.011833: step: 16/77, loss: 0.19354291260242462 2023-01-22 07:59:34.323683: step: 20/77, loss: 0.0005305854137986898 2023-01-22 07:59:35.634823: step: 24/77, loss: 0.001983162248507142 2023-01-22 07:59:36.957502: step: 28/77, loss: 0.01291932538151741 2023-01-22 07:59:38.233603: step: 32/77, loss: 0.0400528721511364 2023-01-22 07:59:39.535487: step: 36/77, loss: 0.0005998695851303637 2023-01-22 07:59:40.836209: step: 40/77, loss: 0.006728531792759895 2023-01-22 07:59:42.133483: step: 44/77, loss: 0.0033883636351674795 2023-01-22 07:59:43.422892: step: 48/77, loss: 0.1282605528831482 2023-01-22 07:59:44.744003: step: 52/77, loss: 0.0037158699706196785 2023-01-22 07:59:46.042834: step: 56/77, loss: 0.009680245071649551 2023-01-22 07:59:47.292380: step: 60/77, loss: 0.014932963997125626 2023-01-22 07:59:48.569074: step: 64/77, loss: 0.0074518099427223206 2023-01-22 07:59:49.837012: step: 68/77, loss: 0.013059078715741634 2023-01-22 07:59:51.180472: step: 72/77, loss: 0.0004891576245427132 2023-01-22 07:59:52.435850: step: 76/77, loss: 0.0008808997226879001 2023-01-22 07:59:53.725151: step: 80/77, loss: 0.007496487349271774 2023-01-22 07:59:55.029919: step: 84/77, loss: 0.4473806321620941 2023-01-22 07:59:56.313357: step: 88/77, loss: 0.022166451439261436 2023-01-22 07:59:57.613481: step: 92/77, loss: 0.07151596248149872 2023-01-22 07:59:58.894184: step: 96/77, loss: 0.009233084507286549 2023-01-22 08:00:00.179121: step: 100/77, loss: 0.004044557921588421 2023-01-22 08:00:01.480613: step: 104/77, loss: 0.009267174638807774 2023-01-22 08:00:02.811864: step: 108/77, loss: 0.0015090053202584386 2023-01-22 08:00:04.120351: step: 112/77, loss: 0.00926131196320057 2023-01-22 08:00:05.425573: step: 116/77, loss: 0.00021723141253460199 2023-01-22 08:00:06.684020: step: 120/77, loss: 0.01728326641023159 2023-01-22 08:00:07.914990: step: 124/77, loss: 0.0654454305768013 2023-01-22 08:00:09.183843: step: 128/77, loss: 0.015401189215481281 2023-01-22 08:00:10.426077: step: 132/77, loss: 0.0012254157336428761 2023-01-22 08:00:11.726998: step: 136/77, loss: 0.00106145441532135 2023-01-22 08:00:13.029849: step: 140/77, loss: 0.0007606762228533626 2023-01-22 08:00:14.350634: step: 144/77, loss: 0.00244477903470397 2023-01-22 08:00:15.585233: step: 148/77, loss: 0.040618762373924255 2023-01-22 08:00:16.872139: step: 152/77, loss: 0.002180933952331543 2023-01-22 08:00:18.164330: step: 156/77, loss: 0.00010296700929757208 2023-01-22 08:00:19.460451: step: 160/77, loss: 0.009789688512682915 2023-01-22 08:00:20.764454: step: 164/77, loss: 5.17836851940956e-05 2023-01-22 08:00:22.080996: step: 168/77, loss: 0.0018696163315325975 2023-01-22 08:00:23.394319: step: 172/77, loss: 0.002193837659433484 2023-01-22 08:00:24.660833: step: 176/77, loss: 0.050133366137742996 2023-01-22 08:00:25.926458: step: 180/77, loss: 0.008347946219146252 2023-01-22 08:00:27.225883: step: 184/77, loss: 0.0014724781503900886 2023-01-22 08:00:28.555627: step: 188/77, loss: 1.9858744053635746e-05 2023-01-22 08:00:29.893987: step: 192/77, loss: 0.04367201402783394 2023-01-22 08:00:31.197292: step: 196/77, loss: 0.007622504606842995 2023-01-22 08:00:32.519759: step: 200/77, loss: 0.01916610449552536 2023-01-22 08:00:33.848970: step: 204/77, loss: 0.0003585350641515106 2023-01-22 08:00:35.152204: step: 208/77, loss: 0.0012512040557339787 2023-01-22 08:00:36.424781: step: 212/77, loss: 0.004894533194601536 2023-01-22 08:00:37.685048: step: 216/77, loss: 0.01604030281305313 2023-01-22 08:00:39.008924: step: 220/77, loss: 0.004095867276191711 2023-01-22 08:00:40.295211: step: 224/77, loss: 1.4710599316458683e-05 2023-01-22 08:00:41.569472: step: 228/77, loss: 0.020325489342212677 2023-01-22 08:00:42.846688: step: 232/77, loss: 0.0009979141177609563 2023-01-22 08:00:44.104738: step: 236/77, loss: 0.01780639961361885 2023-01-22 08:00:45.411009: step: 240/77, loss: 0.003619758877903223 2023-01-22 08:00:46.702520: step: 244/77, loss: 0.0006508044898509979 2023-01-22 08:00:48.062484: step: 248/77, loss: 0.0022863191552460194 2023-01-22 08:00:49.342810: step: 252/77, loss: 0.007784360088407993 2023-01-22 08:00:50.629124: step: 256/77, loss: 0.008496375754475594 2023-01-22 08:00:51.927645: step: 260/77, loss: 0.00044161375262774527 2023-01-22 08:00:53.226062: step: 264/77, loss: 0.009069936349987984 2023-01-22 08:00:54.536372: step: 268/77, loss: 0.0013018647441640496 2023-01-22 08:00:55.863551: step: 272/77, loss: 0.0032529844902455807 2023-01-22 08:00:57.176229: step: 276/77, loss: 0.008116625249385834 2023-01-22 08:00:58.463797: step: 280/77, loss: 0.00029886772972531617 2023-01-22 08:00:59.761716: step: 284/77, loss: 0.001512476010248065 2023-01-22 08:01:01.037719: step: 288/77, loss: 0.004851474426686764 2023-01-22 08:01:02.333978: step: 292/77, loss: 0.0005368001293390989 2023-01-22 08:01:03.620778: step: 296/77, loss: 0.04036924988031387 2023-01-22 08:01:04.942135: step: 300/77, loss: 0.005227272864431143 2023-01-22 08:01:06.234604: step: 304/77, loss: 0.004941250197589397 2023-01-22 08:01:07.492422: step: 308/77, loss: 0.014837159775197506 2023-01-22 08:01:08.770681: step: 312/77, loss: 0.027678687125444412 2023-01-22 08:01:10.039172: step: 316/77, loss: 0.0037269440945237875 2023-01-22 08:01:11.330467: step: 320/77, loss: 0.016393287107348442 2023-01-22 08:01:12.623475: step: 324/77, loss: 0.0006741977413184941 2023-01-22 08:01:13.967213: step: 328/77, loss: 0.004779040813446045 2023-01-22 08:01:15.266166: step: 332/77, loss: 0.04116032272577286 2023-01-22 08:01:16.598193: step: 336/77, loss: 0.006824977695941925 2023-01-22 08:01:17.894676: step: 340/77, loss: 0.01795623078942299 2023-01-22 08:01:19.257031: step: 344/77, loss: 0.00019160851661581546 2023-01-22 08:01:20.589221: step: 348/77, loss: 0.00015201720816548914 2023-01-22 08:01:21.847892: step: 352/77, loss: 0.0006054152618162334 2023-01-22 08:01:23.177156: step: 356/77, loss: 0.025011781603097916 2023-01-22 08:01:24.507787: step: 360/77, loss: 0.00023869529832154512 2023-01-22 08:01:25.790961: step: 364/77, loss: 0.04313571751117706 2023-01-22 08:01:27.092469: step: 368/77, loss: 3.720049426192418e-05 2023-01-22 08:01:28.365393: step: 372/77, loss: 1.5758212612126954e-05 2023-01-22 08:01:29.624427: step: 376/77, loss: 0.038922298699617386 2023-01-22 08:01:30.919847: step: 380/77, loss: 0.00173748389352113 2023-01-22 08:01:32.162961: step: 384/77, loss: 0.0016290738712996244 2023-01-22 08:01:33.494373: step: 388/77, loss: 0.0012468572240322828 ================================================== Loss: 0.018 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Chinese: {'template': {'p': 0.9710144927536232, 'r': 0.5114503816793893, 'f1': 0.6699999999999999}, 'slot': {'p': 0.42857142857142855, 'r': 0.013550135501355014, 'f1': 0.02626970227670753}, 'combined': 0.017600700525394045, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Korean: {'template': {'p': 0.9714285714285714, 'r': 0.5190839694656488, 'f1': 0.6766169154228854}, 'slot': {'p': 0.4444444444444444, 'r': 0.014453477868112014, 'f1': 0.027996500437445317}, 'combined': 0.018942905768619712, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 13} Test Russian: {'template': {'p': 0.9705882352941176, 'r': 0.5038167938931297, 'f1': 0.6633165829145728}, 'slot': {'p': 0.4, 'r': 0.012646793134598013, 'f1': 0.0245183887915937}, 'combined': 0.016263453871810895, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 14 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:03:15.228975: step: 4/77, loss: 0.0035647323820739985 2023-01-22 08:03:16.512845: step: 8/77, loss: 0.0007356871501542628 2023-01-22 08:03:17.782269: step: 12/77, loss: 0.0089479461312294 2023-01-22 08:03:19.090445: step: 16/77, loss: 0.006250545848160982 2023-01-22 08:03:20.352159: step: 20/77, loss: 0.0007717355620115995 2023-01-22 08:03:21.678285: step: 24/77, loss: 0.010335363447666168 2023-01-22 08:03:22.948736: step: 28/77, loss: 0.002224587369710207 2023-01-22 08:03:24.239061: step: 32/77, loss: 0.0004021697968710214 2023-01-22 08:03:25.531270: step: 36/77, loss: 0.002917732112109661 2023-01-22 08:03:26.831076: step: 40/77, loss: 0.022275084629654884 2023-01-22 08:03:28.101098: step: 44/77, loss: 0.01664186269044876 2023-01-22 08:03:29.410476: step: 48/77, loss: 0.0020927239675074816 2023-01-22 08:03:30.667983: step: 52/77, loss: 0.0007523642270825803 2023-01-22 08:03:31.950655: step: 56/77, loss: 0.024307558313012123 2023-01-22 08:03:33.264585: step: 60/77, loss: 0.003226712578907609 2023-01-22 08:03:34.555636: step: 64/77, loss: 0.008963115513324738 2023-01-22 08:03:35.806876: step: 68/77, loss: 0.010883467271924019 2023-01-22 08:03:37.063495: step: 72/77, loss: 0.00818499457091093 2023-01-22 08:03:38.381745: step: 76/77, loss: 0.004292977973818779 2023-01-22 08:03:39.635595: step: 80/77, loss: 0.016569361090660095 2023-01-22 08:03:40.911110: step: 84/77, loss: 7.611776527483016e-05 2023-01-22 08:03:42.214349: step: 88/77, loss: 0.0011977230897173285 2023-01-22 08:03:43.526502: step: 92/77, loss: 0.0015111572574824095 2023-01-22 08:03:44.860326: step: 96/77, loss: 0.0011353702284395695 2023-01-22 08:03:46.175054: step: 100/77, loss: 0.033886488527059555 2023-01-22 08:03:47.474047: step: 104/77, loss: 0.01745476759970188 2023-01-22 08:03:48.687394: step: 108/77, loss: 0.006723622791469097 2023-01-22 08:03:49.996442: step: 112/77, loss: 0.0001270908396691084 2023-01-22 08:03:51.301075: step: 116/77, loss: 0.0004910130519419909 2023-01-22 08:03:52.611947: step: 120/77, loss: 1.6201971448026597e-05 2023-01-22 08:03:53.906514: step: 124/77, loss: 0.0003250141453463584 2023-01-22 08:03:55.174471: step: 128/77, loss: 0.00020061932445969433 2023-01-22 08:03:56.430450: step: 132/77, loss: 0.00979650765657425 2023-01-22 08:03:57.756360: step: 136/77, loss: 0.0642743855714798 2023-01-22 08:03:59.042207: step: 140/77, loss: 0.00012315556523390114 2023-01-22 08:04:00.347552: step: 144/77, loss: 0.03417303040623665 2023-01-22 08:04:01.652868: step: 148/77, loss: 0.004369811620563269 2023-01-22 08:04:02.903960: step: 152/77, loss: 0.00010004561045207083 2023-01-22 08:04:04.152286: step: 156/77, loss: 0.006763361394405365 2023-01-22 08:04:05.458678: step: 160/77, loss: 0.00010431784903630614 2023-01-22 08:04:06.765029: step: 164/77, loss: 0.019510159268975258 2023-01-22 08:04:08.056969: step: 168/77, loss: 0.0006070785457268357 2023-01-22 08:04:09.355926: step: 172/77, loss: 9.138509631156921e-05 2023-01-22 08:04:10.643059: step: 176/77, loss: 0.003176189260557294 2023-01-22 08:04:11.951067: step: 180/77, loss: 0.00040366995381191373 2023-01-22 08:04:13.255823: step: 184/77, loss: 0.028564022853970528 2023-01-22 08:04:14.523468: step: 188/77, loss: 0.060488320887088776 2023-01-22 08:04:15.872013: step: 192/77, loss: 0.0003345083969179541 2023-01-22 08:04:17.149764: step: 196/77, loss: 0.011636440642178059 2023-01-22 08:04:18.400421: step: 200/77, loss: 0.005325679667294025 2023-01-22 08:04:19.653366: step: 204/77, loss: 0.00031878415029495955 2023-01-22 08:04:20.917689: step: 208/77, loss: 0.07789537310600281 2023-01-22 08:04:22.221124: step: 212/77, loss: 6.297907384578139e-05 2023-01-22 08:04:23.508725: step: 216/77, loss: 0.010719101876020432 2023-01-22 08:04:24.775576: step: 220/77, loss: 0.012533249333500862 2023-01-22 08:04:26.041778: step: 224/77, loss: 0.0012144611682742834 2023-01-22 08:04:27.325923: step: 228/77, loss: 0.0013060077326372266 2023-01-22 08:04:28.579196: step: 232/77, loss: 0.11131002008914948 2023-01-22 08:04:29.846531: step: 236/77, loss: 0.0572662279009819 2023-01-22 08:04:31.126303: step: 240/77, loss: 0.005486391019076109 2023-01-22 08:04:32.452926: step: 244/77, loss: 0.001085981959477067 2023-01-22 08:04:33.768139: step: 248/77, loss: 0.005080692004412413 2023-01-22 08:04:35.096830: step: 252/77, loss: 0.029977142810821533 2023-01-22 08:04:36.428022: step: 256/77, loss: 0.04266020283102989 2023-01-22 08:04:37.664834: step: 260/77, loss: 0.0010956295300275087 2023-01-22 08:04:38.947140: step: 264/77, loss: 0.0037063530180603266 2023-01-22 08:04:40.280671: step: 268/77, loss: 0.005354553461074829 2023-01-22 08:04:41.592844: step: 272/77, loss: 0.015287532471120358 2023-01-22 08:04:42.857732: step: 276/77, loss: 0.02026659995317459 2023-01-22 08:04:44.183301: step: 280/77, loss: 0.004163270350545645 2023-01-22 08:04:45.508866: step: 284/77, loss: 0.010022885166108608 2023-01-22 08:04:46.780949: step: 288/77, loss: 0.02518465742468834 2023-01-22 08:04:48.081646: step: 292/77, loss: 0.08042170852422714 2023-01-22 08:04:49.329763: step: 296/77, loss: 5.863496699021198e-05 2023-01-22 08:04:50.594161: step: 300/77, loss: 0.00013342482270672917 2023-01-22 08:04:51.885011: step: 304/77, loss: 0.0158902145922184 2023-01-22 08:04:53.246842: step: 308/77, loss: 0.024475159123539925 2023-01-22 08:04:54.498364: step: 312/77, loss: 0.0015556125435978174 2023-01-22 08:04:55.776957: step: 316/77, loss: 9.906681225402281e-05 2023-01-22 08:04:57.083780: step: 320/77, loss: 5.0956117775058374e-05 2023-01-22 08:04:58.385554: step: 324/77, loss: 0.0007658183458261192 2023-01-22 08:04:59.712202: step: 328/77, loss: 2.353433046664577e-05 2023-01-22 08:05:00.962019: step: 332/77, loss: 8.520779374521226e-05 2023-01-22 08:05:02.246840: step: 336/77, loss: 0.08126899600028992 2023-01-22 08:05:03.506956: step: 340/77, loss: 0.006573853548616171 2023-01-22 08:05:04.785977: step: 344/77, loss: 0.03134193271398544 2023-01-22 08:05:06.066949: step: 348/77, loss: 0.01700267754495144 2023-01-22 08:05:07.408243: step: 352/77, loss: 0.00022600368538405746 2023-01-22 08:05:08.745369: step: 356/77, loss: 0.020432688295841217 2023-01-22 08:05:10.015766: step: 360/77, loss: 0.000203250179765746 2023-01-22 08:05:11.344216: step: 364/77, loss: 0.05017153173685074 2023-01-22 08:05:12.659907: step: 368/77, loss: 0.007926249876618385 2023-01-22 08:05:13.938800: step: 372/77, loss: 0.0001213687501149252 2023-01-22 08:05:15.227643: step: 376/77, loss: 0.012438084930181503 2023-01-22 08:05:16.496107: step: 380/77, loss: 0.04841241613030434 2023-01-22 08:05:17.783320: step: 384/77, loss: 0.0060639334842562675 2023-01-22 08:05:19.042056: step: 388/77, loss: 0.00711484719067812 ================================================== Loss: 0.014 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 14} Test Chinese: {'template': {'p': 0.9558823529411765, 'r': 0.4961832061068702, 'f1': 0.6532663316582915}, 'slot': {'p': 0.5, 'r': 0.014453477868112014, 'f1': 0.028094820017559263}, 'combined': 0.018353400011470875, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 14} Test Korean: {'template': {'p': 0.9402985074626866, 'r': 0.48091603053435117, 'f1': 0.6363636363636365}, 'slot': {'p': 0.48484848484848486, 'r': 0.014453477868112014, 'f1': 0.02807017543859649}, 'combined': 0.017862838915470497, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 14} Test Russian: {'template': {'p': 0.9411764705882353, 'r': 0.48854961832061067, 'f1': 0.6432160804020101}, 'slot': {'p': 0.47058823529411764, 'r': 0.014453477868112014, 'f1': 0.028045574057843997}, 'combined': 0.018039364218110712, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 15 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:07:00.302177: step: 4/77, loss: 0.0002984795719385147 2023-01-22 08:07:01.587132: step: 8/77, loss: 5.2180635975673795e-05 2023-01-22 08:07:02.868522: step: 12/77, loss: 0.013850021176040173 2023-01-22 08:07:04.145414: step: 16/77, loss: 0.04257480800151825 2023-01-22 08:07:05.450610: step: 20/77, loss: 0.0013392434921115637 2023-01-22 08:07:06.758059: step: 24/77, loss: 0.014045661315321922 2023-01-22 08:07:08.071038: step: 28/77, loss: 0.001623183605261147 2023-01-22 08:07:09.312487: step: 32/77, loss: 0.004262380767613649 2023-01-22 08:07:10.627261: step: 36/77, loss: 0.03393692150712013 2023-01-22 08:07:11.822552: step: 40/77, loss: 0.0009387597674503922 2023-01-22 08:07:13.138367: step: 44/77, loss: 0.03857859969139099 2023-01-22 08:07:14.429731: step: 48/77, loss: 0.014586037024855614 2023-01-22 08:07:15.756375: step: 52/77, loss: 0.019439885392785072 2023-01-22 08:07:17.090374: step: 56/77, loss: 0.0005992836668156087 2023-01-22 08:07:18.352285: step: 60/77, loss: 0.00041433415026403964 2023-01-22 08:07:19.636240: step: 64/77, loss: 0.0048797884956002235 2023-01-22 08:07:20.893960: step: 68/77, loss: 0.0019731195643544197 2023-01-22 08:07:22.158812: step: 72/77, loss: 0.01246714685112238 2023-01-22 08:07:23.446478: step: 76/77, loss: 7.696136890444905e-05 2023-01-22 08:07:24.710596: step: 80/77, loss: 0.001765516703017056 2023-01-22 08:07:25.988807: step: 84/77, loss: 0.0006175404414534569 2023-01-22 08:07:27.337182: step: 88/77, loss: 1.5016041288617998e-05 2023-01-22 08:07:28.643057: step: 92/77, loss: 0.006689623463898897 2023-01-22 08:07:29.937154: step: 96/77, loss: 0.0002577479463070631 2023-01-22 08:07:31.239298: step: 100/77, loss: 0.0027033966034650803 2023-01-22 08:07:32.562823: step: 104/77, loss: 0.001462356187403202 2023-01-22 08:07:33.906840: step: 108/77, loss: 0.01454481203109026 2023-01-22 08:07:35.153393: step: 112/77, loss: 0.0029345231596380472 2023-01-22 08:07:36.454818: step: 116/77, loss: 0.009179867804050446 2023-01-22 08:07:37.772391: step: 120/77, loss: 0.00047651416389271617 2023-01-22 08:07:39.042465: step: 124/77, loss: 0.08397921919822693 2023-01-22 08:07:40.346316: step: 128/77, loss: 0.005399615503847599 2023-01-22 08:07:41.592321: step: 132/77, loss: 0.008599193766713142 2023-01-22 08:07:42.902917: step: 136/77, loss: 0.009753820486366749 2023-01-22 08:07:44.217597: step: 140/77, loss: 0.04863092303276062 2023-01-22 08:07:45.545213: step: 144/77, loss: 0.00037321558920666575 2023-01-22 08:07:46.817486: step: 148/77, loss: 0.00017475412460044026 2023-01-22 08:07:48.112138: step: 152/77, loss: 6.773445784347132e-05 2023-01-22 08:07:49.366486: step: 156/77, loss: 0.00019403685291763395 2023-01-22 08:07:50.743898: step: 160/77, loss: 6.706234125886112e-05 2023-01-22 08:07:51.998710: step: 164/77, loss: 0.0005820175283588469 2023-01-22 08:07:53.269544: step: 168/77, loss: 1.0372894394095056e-05 2023-01-22 08:07:54.529379: step: 172/77, loss: 0.00033901131246238947 2023-01-22 08:07:55.831712: step: 176/77, loss: 0.00014996797835920006 2023-01-22 08:07:57.109794: step: 180/77, loss: 0.06107831001281738 2023-01-22 08:07:58.367408: step: 184/77, loss: 0.004911118187010288 2023-01-22 08:07:59.622520: step: 188/77, loss: 0.002654495183378458 2023-01-22 08:08:00.944745: step: 192/77, loss: 0.024621155112981796 2023-01-22 08:08:02.268727: step: 196/77, loss: 2.0832656446145847e-05 2023-01-22 08:08:03.579406: step: 200/77, loss: 0.04934433847665787 2023-01-22 08:08:04.855879: step: 204/77, loss: 0.0053009772673249245 2023-01-22 08:08:06.138399: step: 208/77, loss: 0.00010297299013473094 2023-01-22 08:08:07.454942: step: 212/77, loss: 1.9658375094877556e-05 2023-01-22 08:08:08.727735: step: 216/77, loss: 0.003173955949023366 2023-01-22 08:08:09.997715: step: 220/77, loss: 1.160690499091288e-05 2023-01-22 08:08:11.337398: step: 224/77, loss: 0.009171406738460064 2023-01-22 08:08:12.649382: step: 228/77, loss: 0.016685236245393753 2023-01-22 08:08:13.951031: step: 232/77, loss: 0.0008765912498347461 2023-01-22 08:08:15.262883: step: 236/77, loss: 0.009719951078295708 2023-01-22 08:08:16.486836: step: 240/77, loss: 0.05772913247346878 2023-01-22 08:08:17.793197: step: 244/77, loss: 6.63778992020525e-05 2023-01-22 08:08:19.050741: step: 248/77, loss: 0.007578597869724035 2023-01-22 08:08:20.451174: step: 252/77, loss: 0.0036200552713125944 2023-01-22 08:08:21.684755: step: 256/77, loss: 0.013005263172090054 2023-01-22 08:08:23.003953: step: 260/77, loss: 0.03981100022792816 2023-01-22 08:08:24.251047: step: 264/77, loss: 0.014181100763380527 2023-01-22 08:08:25.573729: step: 268/77, loss: 0.018954459577798843 2023-01-22 08:08:26.846915: step: 272/77, loss: 0.00011651184468064457 2023-01-22 08:08:28.137333: step: 276/77, loss: 0.05421111360192299 2023-01-22 08:08:29.430960: step: 280/77, loss: 0.00025143398670479655 2023-01-22 08:08:30.702245: step: 284/77, loss: 0.0014813337475061417 2023-01-22 08:08:31.973008: step: 288/77, loss: 4.686432657763362e-05 2023-01-22 08:08:33.277613: step: 292/77, loss: 0.031259432435035706 2023-01-22 08:08:34.614501: step: 296/77, loss: 0.03233766555786133 2023-01-22 08:08:35.854199: step: 300/77, loss: 2.2500371414935216e-06 2023-01-22 08:08:37.149756: step: 304/77, loss: 0.00013911757559981197 2023-01-22 08:08:38.459435: step: 308/77, loss: 0.000564001442398876 2023-01-22 08:08:39.767926: step: 312/77, loss: 2.8925347578478977e-05 2023-01-22 08:08:41.058996: step: 316/77, loss: 0.002963209990411997 2023-01-22 08:08:42.346582: step: 320/77, loss: 7.018641917966306e-05 2023-01-22 08:08:43.652095: step: 324/77, loss: 0.0008802684023976326 2023-01-22 08:08:44.988417: step: 328/77, loss: 0.0026326999068260193 2023-01-22 08:08:46.300676: step: 332/77, loss: 0.03058941289782524 2023-01-22 08:08:47.639901: step: 336/77, loss: 1.5420877389260568e-05 2023-01-22 08:08:48.909884: step: 340/77, loss: 0.05822722986340523 2023-01-22 08:08:50.211528: step: 344/77, loss: 0.009377855807542801 2023-01-22 08:08:51.461999: step: 348/77, loss: 0.05395232513546944 2023-01-22 08:08:52.819983: step: 352/77, loss: 0.007040033116936684 2023-01-22 08:08:54.165828: step: 356/77, loss: 0.00036163668846711516 2023-01-22 08:08:55.486665: step: 360/77, loss: 0.00044713931856676936 2023-01-22 08:08:56.743385: step: 364/77, loss: 0.009938497096300125 2023-01-22 08:08:58.023883: step: 368/77, loss: 0.0006480221054516733 2023-01-22 08:08:59.323593: step: 372/77, loss: 0.008452756330370903 2023-01-22 08:09:00.612976: step: 376/77, loss: 0.021464571356773376 2023-01-22 08:09:01.862902: step: 380/77, loss: 0.00037464150227606297 2023-01-22 08:09:03.148222: step: 384/77, loss: 0.005581353325396776 2023-01-22 08:09:04.431124: step: 388/77, loss: 0.003710019402205944 ================================================== Loss: 0.011 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Chinese: {'template': {'p': 0.9701492537313433, 'r': 0.4961832061068702, 'f1': 0.6565656565656566}, 'slot': {'p': 0.48148148148148145, 'r': 0.011743450767841012, 'f1': 0.022927689594356263}, 'combined': 0.015053533572052092, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.48091603053435117, 'f1': 0.6428571428571428}, 'slot': {'p': 0.48148148148148145, 'r': 0.011743450767841012, 'f1': 0.022927689594356263}, 'combined': 0.01473922902494331, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 15} Test Russian: {'template': {'p': 0.9848484848484849, 'r': 0.4961832061068702, 'f1': 0.6598984771573604}, 'slot': {'p': 0.48148148148148145, 'r': 0.011743450767841012, 'f1': 0.022927689594356263}, 'combined': 0.015129947448052355, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 16 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:10:46.044216: step: 4/77, loss: 0.0004463110235519707 2023-01-22 08:10:47.336166: step: 8/77, loss: 0.024274805560708046 2023-01-22 08:10:48.633827: step: 12/77, loss: 0.00424988241866231 2023-01-22 08:10:49.919092: step: 16/77, loss: 2.180452247557696e-05 2023-01-22 08:10:51.187148: step: 20/77, loss: 0.016084343194961548 2023-01-22 08:10:52.459769: step: 24/77, loss: 0.004575492814183235 2023-01-22 08:10:53.796556: step: 28/77, loss: 0.0007353266701102257 2023-01-22 08:10:55.048862: step: 32/77, loss: 1.3090648280922323e-05 2023-01-22 08:10:56.297224: step: 36/77, loss: 0.04062986001372337 2023-01-22 08:10:57.573914: step: 40/77, loss: 0.00453605130314827 2023-01-22 08:10:58.827876: step: 44/77, loss: 7.663781616429333e-06 2023-01-22 08:11:00.085859: step: 48/77, loss: 7.687980541959405e-06 2023-01-22 08:11:01.397588: step: 52/77, loss: 1.1422414900152944e-05 2023-01-22 08:11:02.652919: step: 56/77, loss: 0.01101649273186922 2023-01-22 08:11:03.936549: step: 60/77, loss: 0.00185338722076267 2023-01-22 08:11:05.240459: step: 64/77, loss: 0.005147712305188179 2023-01-22 08:11:06.579898: step: 68/77, loss: 0.02915601246058941 2023-01-22 08:11:07.861772: step: 72/77, loss: 0.0007723210146650672 2023-01-22 08:11:09.143390: step: 76/77, loss: 0.023317718878388405 2023-01-22 08:11:10.466141: step: 80/77, loss: 0.0010713314404711127 2023-01-22 08:11:11.777694: step: 84/77, loss: 0.00702679855749011 2023-01-22 08:11:13.048212: step: 88/77, loss: 0.00013156705244909972 2023-01-22 08:11:14.366858: step: 92/77, loss: 0.0008226665668189526 2023-01-22 08:11:15.632041: step: 96/77, loss: 0.003145791357383132 2023-01-22 08:11:16.855539: step: 100/77, loss: 0.0030092468950897455 2023-01-22 08:11:18.110029: step: 104/77, loss: 3.409211331018014e-06 2023-01-22 08:11:19.398959: step: 108/77, loss: 0.00950823538005352 2023-01-22 08:11:20.694078: step: 112/77, loss: 9.047974890563637e-05 2023-01-22 08:11:22.003915: step: 116/77, loss: 0.00112790591083467 2023-01-22 08:11:23.291414: step: 120/77, loss: 0.00042730997665785253 2023-01-22 08:11:24.590371: step: 124/77, loss: 0.04910646751523018 2023-01-22 08:11:25.895312: step: 128/77, loss: 0.0008218835573643446 2023-01-22 08:11:27.238814: step: 132/77, loss: 0.0006013006786815822 2023-01-22 08:11:28.513202: step: 136/77, loss: 0.012114128097891808 2023-01-22 08:11:29.853259: step: 140/77, loss: 1.411896118952427e-05 2023-01-22 08:11:31.200851: step: 144/77, loss: 0.06364770233631134 2023-01-22 08:11:32.483465: step: 148/77, loss: 0.002993387635797262 2023-01-22 08:11:33.723388: step: 152/77, loss: 2.0418836356839165e-05 2023-01-22 08:11:35.010622: step: 156/77, loss: 0.008779329247772694 2023-01-22 08:11:36.338254: step: 160/77, loss: 0.00208136229775846 2023-01-22 08:11:37.667812: step: 164/77, loss: 0.00047932719462551177 2023-01-22 08:11:38.947919: step: 168/77, loss: 0.025327226147055626 2023-01-22 08:11:40.223952: step: 172/77, loss: 0.007658546324819326 2023-01-22 08:11:41.507110: step: 176/77, loss: 0.003932743798941374 2023-01-22 08:11:42.803839: step: 180/77, loss: 0.0012501177843660116 2023-01-22 08:11:44.094548: step: 184/77, loss: 0.004465624690055847 2023-01-22 08:11:45.375122: step: 188/77, loss: 0.006403674371540546 2023-01-22 08:11:46.644463: step: 192/77, loss: 0.013701226562261581 2023-01-22 08:11:47.848791: step: 196/77, loss: 0.0032587507739663124 2023-01-22 08:11:49.143938: step: 200/77, loss: 0.01083880104124546 2023-01-22 08:11:50.481451: step: 204/77, loss: 0.0682225227355957 2023-01-22 08:11:51.800789: step: 208/77, loss: 0.004877833183854818 2023-01-22 08:11:53.084114: step: 212/77, loss: 0.03905593231320381 2023-01-22 08:11:54.374295: step: 216/77, loss: 0.02148747816681862 2023-01-22 08:11:55.657683: step: 220/77, loss: 0.004947171080857515 2023-01-22 08:11:56.974050: step: 224/77, loss: 4.1722978494362906e-07 2023-01-22 08:11:58.285373: step: 228/77, loss: 0.00029864057432860136 2023-01-22 08:11:59.586649: step: 232/77, loss: 0.02274949662387371 2023-01-22 08:12:00.849138: step: 236/77, loss: 0.00946321152150631 2023-01-22 08:12:02.102222: step: 240/77, loss: 0.14353521168231964 2023-01-22 08:12:03.375345: step: 244/77, loss: 0.00017940827819984406 2023-01-22 08:12:04.672908: step: 248/77, loss: 1.54062745423289e-05 2023-01-22 08:12:05.992952: step: 252/77, loss: 2.4544437110307626e-05 2023-01-22 08:12:07.294264: step: 256/77, loss: 0.003466332098469138 2023-01-22 08:12:08.627691: step: 260/77, loss: 0.10752370208501816 2023-01-22 08:12:09.943447: step: 264/77, loss: 0.03569445386528969 2023-01-22 08:12:11.242383: step: 268/77, loss: 1.0468648724781815e-05 2023-01-22 08:12:12.529796: step: 272/77, loss: 0.028262851759791374 2023-01-22 08:12:13.839166: step: 276/77, loss: 0.00022459420142695308 2023-01-22 08:12:15.157594: step: 280/77, loss: 0.013898893259465694 2023-01-22 08:12:16.503071: step: 284/77, loss: 0.007969505153596401 2023-01-22 08:12:17.835320: step: 288/77, loss: 2.6130308469873853e-05 2023-01-22 08:12:19.148234: step: 292/77, loss: 4.822365372092463e-05 2023-01-22 08:12:20.470907: step: 296/77, loss: 0.002555176615715027 2023-01-22 08:12:21.749307: step: 300/77, loss: 5.701354530174285e-05 2023-01-22 08:12:23.059670: step: 304/77, loss: 0.005560105200856924 2023-01-22 08:12:24.331591: step: 308/77, loss: 0.04731611907482147 2023-01-22 08:12:25.585293: step: 312/77, loss: 7.124312833184376e-05 2023-01-22 08:12:26.877808: step: 316/77, loss: 0.0011542986612766981 2023-01-22 08:12:28.183314: step: 320/77, loss: 3.0384167985175736e-05 2023-01-22 08:12:29.399803: step: 324/77, loss: 0.002147078514099121 2023-01-22 08:12:30.692976: step: 328/77, loss: 0.04028966650366783 2023-01-22 08:12:32.044875: step: 332/77, loss: 3.333506901981309e-05 2023-01-22 08:12:33.345382: step: 336/77, loss: 0.0029658996500074863 2023-01-22 08:12:34.656742: step: 340/77, loss: 0.003935584332793951 2023-01-22 08:12:35.907230: step: 344/77, loss: 0.014070598408579826 2023-01-22 08:12:37.147013: step: 348/77, loss: 0.0002728747494984418 2023-01-22 08:12:38.427471: step: 352/77, loss: 0.04345415160059929 2023-01-22 08:12:39.734451: step: 356/77, loss: 0.011620284989476204 2023-01-22 08:12:41.052804: step: 360/77, loss: 0.010270928032696247 2023-01-22 08:12:42.331975: step: 364/77, loss: 0.00018565382924862206 2023-01-22 08:12:43.594225: step: 368/77, loss: 0.0017443906981498003 2023-01-22 08:12:44.853102: step: 372/77, loss: 0.04768596962094307 2023-01-22 08:12:46.151249: step: 376/77, loss: 2.2521740902448073e-05 2023-01-22 08:12:47.463638: step: 380/77, loss: 0.03269355744123459 2023-01-22 08:12:48.733241: step: 384/77, loss: 0.041919857263565063 2023-01-22 08:12:50.025859: step: 388/77, loss: 0.032667119055986404 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Chinese: {'template': {'p': 0.9452054794520548, 'r': 0.5267175572519084, 'f1': 0.676470588235294}, 'slot': {'p': 0.4444444444444444, 'r': 0.014453477868112014, 'f1': 0.027996500437445317}, 'combined': 0.0189388091194483, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Korean: {'template': {'p': 0.9452054794520548, 'r': 0.5267175572519084, 'f1': 0.676470588235294}, 'slot': {'p': 0.45714285714285713, 'r': 0.014453477868112014, 'f1': 0.028021015761821366}, 'combined': 0.018955393015349747, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5343511450381679, 'f1': 0.6829268292682927}, 'slot': {'p': 0.45714285714285713, 'r': 0.014453477868112014, 'f1': 0.028021015761821366}, 'combined': 0.01913630344709752, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 17 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:14:30.888569: step: 4/77, loss: 0.054453346878290176 2023-01-22 08:14:32.176701: step: 8/77, loss: 0.002369248541072011 2023-01-22 08:14:33.472994: step: 12/77, loss: 0.00011414064647397026 2023-01-22 08:14:34.778005: step: 16/77, loss: 0.00033900514245033264 2023-01-22 08:14:36.061334: step: 20/77, loss: 6.442344601964578e-05 2023-01-22 08:14:37.313732: step: 24/77, loss: 0.001451255870051682 2023-01-22 08:14:38.598291: step: 28/77, loss: 5.508850790647557e-06 2023-01-22 08:14:39.866450: step: 32/77, loss: 1.4209333130565938e-05 2023-01-22 08:14:41.156659: step: 36/77, loss: 0.0001875080051831901 2023-01-22 08:14:42.441882: step: 40/77, loss: 0.00018672860460355878 2023-01-22 08:14:43.739932: step: 44/77, loss: 3.9590067899553105e-06 2023-01-22 08:14:45.035801: step: 48/77, loss: 2.7718328055925667e-05 2023-01-22 08:14:46.308189: step: 52/77, loss: 8.046550874496461e-07 2023-01-22 08:14:47.567427: step: 56/77, loss: 0.0021199325565248728 2023-01-22 08:14:48.855820: step: 60/77, loss: 0.008005029521882534 2023-01-22 08:14:50.127471: step: 64/77, loss: 0.0005227422225289047 2023-01-22 08:14:51.478230: step: 68/77, loss: 0.0992288812994957 2023-01-22 08:14:52.773320: step: 72/77, loss: 0.0037284833379089832 2023-01-22 08:14:54.053909: step: 76/77, loss: 0.0004825929645448923 2023-01-22 08:14:55.328115: step: 80/77, loss: 7.151671525207348e-06 2023-01-22 08:14:56.634490: step: 84/77, loss: 0.06846614927053452 2023-01-22 08:14:57.915418: step: 88/77, loss: 0.00017095819930545986 2023-01-22 08:14:59.194553: step: 92/77, loss: 0.009575593285262585 2023-01-22 08:15:00.469444: step: 96/77, loss: 0.0002829671429935843 2023-01-22 08:15:01.740122: step: 100/77, loss: 5.086255259811878e-05 2023-01-22 08:15:03.031638: step: 104/77, loss: 0.0008916123188100755 2023-01-22 08:15:04.308655: step: 108/77, loss: 0.06513987481594086 2023-01-22 08:15:05.588751: step: 112/77, loss: 2.262550151499454e-05 2023-01-22 08:15:06.886687: step: 116/77, loss: 0.0018423409201204777 2023-01-22 08:15:08.190985: step: 120/77, loss: 0.004341209307312965 2023-01-22 08:15:09.503574: step: 124/77, loss: 3.488311995170079e-05 2023-01-22 08:15:10.827648: step: 128/77, loss: 0.008629067800939083 2023-01-22 08:15:12.156208: step: 132/77, loss: 0.00028946733800694346 2023-01-22 08:15:13.423506: step: 136/77, loss: 0.05375664681196213 2023-01-22 08:15:14.738938: step: 140/77, loss: 0.028950396925210953 2023-01-22 08:15:16.028739: step: 144/77, loss: 1.7149346604128368e-05 2023-01-22 08:15:17.332279: step: 148/77, loss: 0.009458029642701149 2023-01-22 08:15:18.643418: step: 152/77, loss: 0.004180568736046553 2023-01-22 08:15:19.936769: step: 156/77, loss: 0.006696035154163837 2023-01-22 08:15:21.231347: step: 160/77, loss: 0.001574444817379117 2023-01-22 08:15:22.485396: step: 164/77, loss: 0.00542917987331748 2023-01-22 08:15:23.817111: step: 168/77, loss: 0.0009257158963009715 2023-01-22 08:15:25.068863: step: 172/77, loss: 0.004280414432287216 2023-01-22 08:15:26.368473: step: 176/77, loss: 0.0005574374226853251 2023-01-22 08:15:27.685301: step: 180/77, loss: 5.240487735136412e-05 2023-01-22 08:15:28.977675: step: 184/77, loss: 0.005636123474687338 2023-01-22 08:15:30.249181: step: 188/77, loss: 0.00011814519530162215 2023-01-22 08:15:31.525526: step: 192/77, loss: 0.03577865660190582 2023-01-22 08:15:32.788795: step: 196/77, loss: 0.013659999705851078 2023-01-22 08:15:34.115063: step: 200/77, loss: 0.00022276055824477226 2023-01-22 08:15:35.395483: step: 204/77, loss: 0.06914380192756653 2023-01-22 08:15:36.656778: step: 208/77, loss: 3.636732435552403e-05 2023-01-22 08:15:37.934832: step: 212/77, loss: 0.00028344907332211733 2023-01-22 08:15:39.243584: step: 216/77, loss: 0.0017052993644028902 2023-01-22 08:15:40.516318: step: 220/77, loss: 0.0013186639407649636 2023-01-22 08:15:41.812882: step: 224/77, loss: 0.0039030585903674364 2023-01-22 08:15:43.111634: step: 228/77, loss: 0.025749292224645615 2023-01-22 08:15:44.400660: step: 232/77, loss: 0.0008861465030349791 2023-01-22 08:15:45.661706: step: 236/77, loss: 5.695230356650427e-05 2023-01-22 08:15:46.961476: step: 240/77, loss: 0.0024523527827113867 2023-01-22 08:15:48.230172: step: 244/77, loss: 1.264049751625862e-05 2023-01-22 08:15:49.502425: step: 248/77, loss: 0.01322510652244091 2023-01-22 08:15:50.852788: step: 252/77, loss: 0.0026189072523266077 2023-01-22 08:15:52.132151: step: 256/77, loss: 0.006667570676654577 2023-01-22 08:15:53.470109: step: 260/77, loss: 0.0010307086631655693 2023-01-22 08:15:54.750445: step: 264/77, loss: 0.022734124213457108 2023-01-22 08:15:56.050410: step: 268/77, loss: 0.001391223049722612 2023-01-22 08:15:57.361903: step: 272/77, loss: 0.010047761723399162 2023-01-22 08:15:58.627308: step: 276/77, loss: 0.0538487546145916 2023-01-22 08:15:59.887881: step: 280/77, loss: 0.03308132290840149 2023-01-22 08:16:01.206907: step: 284/77, loss: 0.004668292123824358 2023-01-22 08:16:02.502150: step: 288/77, loss: 0.012309453450143337 2023-01-22 08:16:03.808863: step: 292/77, loss: 0.000616499746683985 2023-01-22 08:16:05.145800: step: 296/77, loss: 0.0017074373317882419 2023-01-22 08:16:06.425340: step: 300/77, loss: 0.00016156738274730742 2023-01-22 08:16:07.688655: step: 304/77, loss: 0.001918491441756487 2023-01-22 08:16:08.945990: step: 308/77, loss: 0.000854568206705153 2023-01-22 08:16:10.225318: step: 312/77, loss: 0.00010927413677563891 2023-01-22 08:16:11.543408: step: 316/77, loss: 0.00046209630090743303 2023-01-22 08:16:12.874185: step: 320/77, loss: 0.00025831477250903845 2023-01-22 08:16:14.164352: step: 324/77, loss: 0.0002843450929503888 2023-01-22 08:16:15.454236: step: 328/77, loss: 0.0011308686807751656 2023-01-22 08:16:16.768453: step: 332/77, loss: 0.028110980987548828 2023-01-22 08:16:18.106262: step: 336/77, loss: 6.039168874849565e-05 2023-01-22 08:16:19.412544: step: 340/77, loss: 0.04042520746588707 2023-01-22 08:16:20.797426: step: 344/77, loss: 0.0008626186172477901 2023-01-22 08:16:22.084305: step: 348/77, loss: 0.007865481078624725 2023-01-22 08:16:23.424850: step: 352/77, loss: 6.5393810473324265e-06 2023-01-22 08:16:24.705777: step: 356/77, loss: 0.005444225389510393 2023-01-22 08:16:25.955191: step: 360/77, loss: 0.12471656501293182 2023-01-22 08:16:27.245420: step: 364/77, loss: 0.0909993126988411 2023-01-22 08:16:28.566445: step: 368/77, loss: 0.08299486339092255 2023-01-22 08:16:29.841455: step: 372/77, loss: 0.0007615904905833304 2023-01-22 08:16:31.111079: step: 376/77, loss: 0.004367351066321135 2023-01-22 08:16:32.376155: step: 380/77, loss: 0.0003982612688560039 2023-01-22 08:16:33.678531: step: 384/77, loss: 0.00010780211596284062 2023-01-22 08:16:35.045953: step: 388/77, loss: 0.00010356045822845772 ================================================== Loss: 0.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Chinese: {'template': {'p': 0.935064935064935, 'r': 0.549618320610687, 'f1': 0.6923076923076923}, 'slot': {'p': 0.48484848484848486, 'r': 0.014453477868112014, 'f1': 0.02807017543859649}, 'combined': 0.0194331983805668, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Korean: {'template': {'p': 0.922077922077922, 'r': 0.5419847328244275, 'f1': 0.6826923076923077}, 'slot': {'p': 0.4857142857142857, 'r': 0.015356820234869015, 'f1': 0.0297723292469352}, 'combined': 0.02032534015896538, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 17} Test Russian: {'template': {'p': 0.9358974358974359, 'r': 0.5572519083969466, 'f1': 0.6985645933014354}, 'slot': {'p': 0.5, 'r': 0.015356820234869015, 'f1': 0.029798422436459242}, 'combined': 0.020816122850349516, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 18 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:18:17.048109: step: 4/77, loss: 0.0013607381843030453 2023-01-22 08:18:18.330152: step: 8/77, loss: 0.02796463668346405 2023-01-22 08:18:19.592413: step: 12/77, loss: 0.03240637853741646 2023-01-22 08:18:20.883222: step: 16/77, loss: 0.0004766408819705248 2023-01-22 08:18:22.088510: step: 20/77, loss: 0.007245106156915426 2023-01-22 08:18:23.401633: step: 24/77, loss: 0.005309689790010452 2023-01-22 08:18:24.656137: step: 28/77, loss: 0.022519662976264954 2023-01-22 08:18:25.967420: step: 32/77, loss: 0.012556134723126888 2023-01-22 08:18:27.271280: step: 36/77, loss: 6.95910130161792e-05 2023-01-22 08:18:28.583011: step: 40/77, loss: 0.0017030031885951757 2023-01-22 08:18:29.884918: step: 44/77, loss: 0.0013226622249931097 2023-01-22 08:18:31.137603: step: 48/77, loss: 0.0002576282131485641 2023-01-22 08:18:32.432526: step: 52/77, loss: 0.10945596545934677 2023-01-22 08:18:33.780703: step: 56/77, loss: 0.0019031423144042492 2023-01-22 08:18:35.080952: step: 60/77, loss: 7.376941357506439e-05 2023-01-22 08:18:36.387440: step: 64/77, loss: 8.989451998786535e-06 2023-01-22 08:18:37.686604: step: 68/77, loss: 0.00048225466161966324 2023-01-22 08:18:38.976054: step: 72/77, loss: 0.009790761396288872 2023-01-22 08:18:40.231429: step: 76/77, loss: 0.00016322605370078236 2023-01-22 08:18:41.525547: step: 80/77, loss: 3.763324275496416e-05 2023-01-22 08:18:42.842716: step: 84/77, loss: 0.000363578787073493 2023-01-22 08:18:44.148759: step: 88/77, loss: 0.024104459211230278 2023-01-22 08:18:45.465293: step: 92/77, loss: 0.001635014428757131 2023-01-22 08:18:46.752406: step: 96/77, loss: 0.008522514253854752 2023-01-22 08:18:48.052832: step: 100/77, loss: 0.0014053500490263104 2023-01-22 08:18:49.340167: step: 104/77, loss: 3.954611383960582e-05 2023-01-22 08:18:50.703555: step: 108/77, loss: 0.004524328745901585 2023-01-22 08:18:52.008802: step: 112/77, loss: 0.004729835316538811 2023-01-22 08:18:53.283249: step: 116/77, loss: 0.00045003817649558187 2023-01-22 08:18:54.581976: step: 120/77, loss: 7.829015521565452e-05 2023-01-22 08:18:55.873475: step: 124/77, loss: 9.200895146932453e-05 2023-01-22 08:18:57.167310: step: 128/77, loss: 0.0005236926954239607 2023-01-22 08:18:58.446563: step: 132/77, loss: 0.0073389639146625996 2023-01-22 08:18:59.727477: step: 136/77, loss: 0.008800855837762356 2023-01-22 08:19:01.032281: step: 140/77, loss: 0.006390353199094534 2023-01-22 08:19:02.327715: step: 144/77, loss: 0.0022420105524361134 2023-01-22 08:19:03.555800: step: 148/77, loss: 0.003213587449863553 2023-01-22 08:19:04.822721: step: 152/77, loss: 4.092996823601425e-05 2023-01-22 08:19:06.078207: step: 156/77, loss: 0.016200561076402664 2023-01-22 08:19:07.356210: step: 160/77, loss: 0.0342121496796608 2023-01-22 08:19:08.654718: step: 164/77, loss: 0.0004409652901813388 2023-01-22 08:19:09.921631: step: 168/77, loss: 0.002289507072418928 2023-01-22 08:19:11.221750: step: 172/77, loss: 0.0008406225824728608 2023-01-22 08:19:12.543594: step: 176/77, loss: 0.0002802301896736026 2023-01-22 08:19:13.818949: step: 180/77, loss: 0.01646391674876213 2023-01-22 08:19:15.069861: step: 184/77, loss: 0.03260109946131706 2023-01-22 08:19:16.400466: step: 188/77, loss: 0.0011455873027443886 2023-01-22 08:19:17.713255: step: 192/77, loss: 7.002104393905029e-05 2023-01-22 08:19:18.951403: step: 196/77, loss: 0.0010831948602572083 2023-01-22 08:19:20.250900: step: 200/77, loss: 0.00020702120673377067 2023-01-22 08:19:21.515907: step: 204/77, loss: 0.0004199196700938046 2023-01-22 08:19:22.820416: step: 208/77, loss: 0.0051574683748185635 2023-01-22 08:19:24.054136: step: 212/77, loss: 9.940290328813717e-05 2023-01-22 08:19:25.348798: step: 216/77, loss: 0.00029683380853384733 2023-01-22 08:19:26.640038: step: 220/77, loss: 1.3445323929772712e-05 2023-01-22 08:19:27.974264: step: 224/77, loss: 0.0007331773522309959 2023-01-22 08:19:29.241547: step: 228/77, loss: 5.822964340040926e-06 2023-01-22 08:19:30.544266: step: 232/77, loss: 2.0315183064667508e-05 2023-01-22 08:19:31.857198: step: 236/77, loss: 0.00042941138963215053 2023-01-22 08:19:33.138981: step: 240/77, loss: 0.004983678925782442 2023-01-22 08:19:34.443556: step: 244/77, loss: 0.046360064297914505 2023-01-22 08:19:35.746385: step: 248/77, loss: 0.00013092627341393381 2023-01-22 08:19:37.069351: step: 252/77, loss: 3.65358755516354e-06 2023-01-22 08:19:38.371992: step: 256/77, loss: 0.004260228481143713 2023-01-22 08:19:39.638688: step: 260/77, loss: 0.0007137374486774206 2023-01-22 08:19:40.962240: step: 264/77, loss: 0.009653638117015362 2023-01-22 08:19:42.265531: step: 268/77, loss: 0.00011329995322739705 2023-01-22 08:19:43.606266: step: 272/77, loss: 2.1175910660531372e-05 2023-01-22 08:19:44.944399: step: 276/77, loss: 0.02434980869293213 2023-01-22 08:19:46.249484: step: 280/77, loss: 0.019267985597252846 2023-01-22 08:19:47.538923: step: 284/77, loss: 1.3849632523488253e-05 2023-01-22 08:19:48.818726: step: 288/77, loss: 0.0006705262931063771 2023-01-22 08:19:50.072219: step: 292/77, loss: 0.0005639860755763948 2023-01-22 08:19:51.369111: step: 296/77, loss: 0.004534238949418068 2023-01-22 08:19:52.632024: step: 300/77, loss: 0.01240801066160202 2023-01-22 08:19:53.914671: step: 304/77, loss: 0.0001623555872356519 2023-01-22 08:19:55.210303: step: 308/77, loss: 0.01382834929972887 2023-01-22 08:19:56.439587: step: 312/77, loss: 0.0010725526371970773 2023-01-22 08:19:57.707483: step: 316/77, loss: 0.09580781310796738 2023-01-22 08:19:59.019494: step: 320/77, loss: 0.0006804431322962046 2023-01-22 08:20:00.336249: step: 324/77, loss: 0.0001848753308877349 2023-01-22 08:20:01.655558: step: 328/77, loss: 0.000658830045722425 2023-01-22 08:20:03.004221: step: 332/77, loss: 0.015460480935871601 2023-01-22 08:20:04.283882: step: 336/77, loss: 6.475405825767666e-06 2023-01-22 08:20:05.631682: step: 340/77, loss: 0.0004257145628798753 2023-01-22 08:20:06.944067: step: 344/77, loss: 1.8175755030824803e-05 2023-01-22 08:20:08.236731: step: 348/77, loss: 0.002383376006036997 2023-01-22 08:20:09.518819: step: 352/77, loss: 0.010072077624499798 2023-01-22 08:20:10.789132: step: 356/77, loss: 0.0045144869945943356 2023-01-22 08:20:12.069821: step: 360/77, loss: 0.06425094604492188 2023-01-22 08:20:13.350553: step: 364/77, loss: 0.0003105873183812946 2023-01-22 08:20:14.619642: step: 368/77, loss: 0.007175946142524481 2023-01-22 08:20:15.890734: step: 372/77, loss: 0.00176761404145509 2023-01-22 08:20:17.168870: step: 376/77, loss: 0.03691961616277695 2023-01-22 08:20:18.454188: step: 380/77, loss: 0.0033785656560212374 2023-01-22 08:20:19.720951: step: 384/77, loss: 0.028056461364030838 2023-01-22 08:20:21.081804: step: 388/77, loss: 1.9826911739073694e-05 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Chinese: {'template': {'p': 0.971830985915493, 'r': 0.5267175572519084, 'f1': 0.6831683168316832}, 'slot': {'p': 0.4666666666666667, 'r': 0.012646793134598013, 'f1': 0.024626209322779244}, 'combined': 0.016823845972987802, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Korean: {'template': {'p': 0.9577464788732394, 'r': 0.5190839694656488, 'f1': 0.6732673267326732}, 'slot': {'p': 0.4666666666666667, 'r': 0.012646793134598013, 'f1': 0.024626209322779244}, 'combined': 0.016580022118306816, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 18} Test Russian: {'template': {'p': 0.9583333333333334, 'r': 0.5267175572519084, 'f1': 0.6798029556650246}, 'slot': {'p': 0.4666666666666667, 'r': 0.012646793134598013, 'f1': 0.024626209322779244}, 'combined': 0.016740969884450913, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 19 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:22:02.769073: step: 4/77, loss: 1.1375072972441558e-05 2023-01-22 08:22:04.014007: step: 8/77, loss: 0.11558934301137924 2023-01-22 08:22:05.312282: step: 12/77, loss: 0.026156434789299965 2023-01-22 08:22:06.644030: step: 16/77, loss: 6.465530896093696e-05 2023-01-22 08:22:07.952163: step: 20/77, loss: 5.3221036068862304e-05 2023-01-22 08:22:09.227595: step: 24/77, loss: 8.60507643665187e-05 2023-01-22 08:22:10.518226: step: 28/77, loss: 2.8975300665479153e-05 2023-01-22 08:22:11.821135: step: 32/77, loss: 0.039114248007535934 2023-01-22 08:22:13.081944: step: 36/77, loss: 0.00038312008837237954 2023-01-22 08:22:14.377800: step: 40/77, loss: 0.13180230557918549 2023-01-22 08:22:15.664690: step: 44/77, loss: 0.000846332055516541 2023-01-22 08:22:16.961719: step: 48/77, loss: 0.02761784754693508 2023-01-22 08:22:18.257677: step: 52/77, loss: 9.444686293136328e-05 2023-01-22 08:22:19.580307: step: 56/77, loss: 6.0827398556284606e-05 2023-01-22 08:22:20.914929: step: 60/77, loss: 0.001431214390322566 2023-01-22 08:22:22.235244: step: 64/77, loss: 0.001511475071310997 2023-01-22 08:22:23.520319: step: 68/77, loss: 0.021096883341670036 2023-01-22 08:22:24.752117: step: 72/77, loss: 0.08325263112783432 2023-01-22 08:22:26.083680: step: 76/77, loss: 0.000645479594822973 2023-01-22 08:22:27.395759: step: 80/77, loss: 0.0013617220101878047 2023-01-22 08:22:28.664245: step: 84/77, loss: 0.0008968734182417393 2023-01-22 08:22:29.965099: step: 88/77, loss: 3.4381380828563124e-05 2023-01-22 08:22:31.292862: step: 92/77, loss: 4.9660422519082204e-05 2023-01-22 08:22:32.585720: step: 96/77, loss: 3.6714936868520454e-05 2023-01-22 08:22:33.863226: step: 100/77, loss: 0.0014671917306259274 2023-01-22 08:22:35.141130: step: 104/77, loss: 3.5135267353325617e-06 2023-01-22 08:22:36.402947: step: 108/77, loss: 1.618326859897934e-05 2023-01-22 08:22:37.698076: step: 112/77, loss: 9.728290024213493e-06 2023-01-22 08:22:38.991470: step: 116/77, loss: 0.0003774128563236445 2023-01-22 08:22:40.304423: step: 120/77, loss: 4.9582966312300414e-05 2023-01-22 08:22:41.600897: step: 124/77, loss: 0.014143971726298332 2023-01-22 08:22:42.917745: step: 128/77, loss: 6.825715536251664e-05 2023-01-22 08:22:44.241584: step: 132/77, loss: 0.00046930837561376393 2023-01-22 08:22:45.496153: step: 136/77, loss: 0.03473134711384773 2023-01-22 08:22:46.794371: step: 140/77, loss: 5.870778841199353e-05 2023-01-22 08:22:48.070989: step: 144/77, loss: 0.00018723284301813692 2023-01-22 08:22:49.367188: step: 148/77, loss: 0.0039024415891617537 2023-01-22 08:22:50.651883: step: 152/77, loss: 0.016148289665579796 2023-01-22 08:22:51.910643: step: 156/77, loss: 9.999565008911304e-06 2023-01-22 08:22:53.151300: step: 160/77, loss: 1.606312707735924e-06 2023-01-22 08:22:54.451959: step: 164/77, loss: 0.001128826173953712 2023-01-22 08:22:55.744872: step: 168/77, loss: 0.0005516837118193507 2023-01-22 08:22:57.024199: step: 172/77, loss: 0.0003651257138699293 2023-01-22 08:22:58.330747: step: 176/77, loss: 0.0001059175847331062 2023-01-22 08:22:59.597401: step: 180/77, loss: 0.0007901396602392197 2023-01-22 08:23:00.842799: step: 184/77, loss: 3.967192242271267e-05 2023-01-22 08:23:02.140185: step: 188/77, loss: 0.0005969982594251633 2023-01-22 08:23:03.465098: step: 192/77, loss: 4.7730307414894924e-05 2023-01-22 08:23:04.790665: step: 196/77, loss: 3.742340413737111e-05 2023-01-22 08:23:06.039424: step: 200/77, loss: 0.00013582775136455894 2023-01-22 08:23:07.336181: step: 204/77, loss: 0.0011125564342364669 2023-01-22 08:23:08.616795: step: 208/77, loss: 0.0021483588498085737 2023-01-22 08:23:09.941653: step: 212/77, loss: 0.0002482504933141172 2023-01-22 08:23:11.252898: step: 216/77, loss: 3.252530223107897e-05 2023-01-22 08:23:12.578717: step: 220/77, loss: 0.03862406313419342 2023-01-22 08:23:13.896524: step: 224/77, loss: 0.03803897649049759 2023-01-22 08:23:15.141904: step: 228/77, loss: 0.0027629907708615065 2023-01-22 08:23:16.448292: step: 232/77, loss: 0.022126782685518265 2023-01-22 08:23:17.680739: step: 236/77, loss: 7.108498721208889e-06 2023-01-22 08:23:18.923941: step: 240/77, loss: 1.3698463590117171e-05 2023-01-22 08:23:20.247834: step: 244/77, loss: 0.0001432388526154682 2023-01-22 08:23:21.515865: step: 248/77, loss: 0.03477947786450386 2023-01-22 08:23:22.822597: step: 252/77, loss: 0.037318840622901917 2023-01-22 08:23:24.144833: step: 256/77, loss: 0.00020358621259219944 2023-01-22 08:23:25.434345: step: 260/77, loss: 0.0005873471382074058 2023-01-22 08:23:26.700354: step: 264/77, loss: 0.003738407976925373 2023-01-22 08:23:28.012744: step: 268/77, loss: 0.007683799136430025 2023-01-22 08:23:29.308812: step: 272/77, loss: 0.001356719876639545 2023-01-22 08:23:30.612604: step: 276/77, loss: 0.00033869032631628215 2023-01-22 08:23:31.896385: step: 280/77, loss: 0.00492723798379302 2023-01-22 08:23:33.209095: step: 284/77, loss: 0.0012603984214365482 2023-01-22 08:23:34.486726: step: 288/77, loss: 0.0006752713234163821 2023-01-22 08:23:35.811901: step: 292/77, loss: 3.044335426238831e-05 2023-01-22 08:23:37.102315: step: 296/77, loss: 4.2869993194472045e-05 2023-01-22 08:23:38.441473: step: 300/77, loss: 0.01467337179929018 2023-01-22 08:23:39.724998: step: 304/77, loss: 0.0013566524721682072 2023-01-22 08:23:41.022009: step: 308/77, loss: 0.00900754053145647 2023-01-22 08:23:42.330498: step: 312/77, loss: 0.00014935439685359597 2023-01-22 08:23:43.675484: step: 316/77, loss: 0.00011078764509875327 2023-01-22 08:23:44.951582: step: 320/77, loss: 0.0012865741737186909 2023-01-22 08:23:46.304562: step: 324/77, loss: 9.888896602205932e-05 2023-01-22 08:23:47.610761: step: 328/77, loss: 0.0003299658128526062 2023-01-22 08:23:48.893886: step: 332/77, loss: 0.017302073538303375 2023-01-22 08:23:50.234178: step: 336/77, loss: 0.06188048794865608 2023-01-22 08:23:51.567993: step: 340/77, loss: 3.5314424167154357e-06 2023-01-22 08:23:52.864877: step: 344/77, loss: 0.007390220183879137 2023-01-22 08:23:54.115868: step: 348/77, loss: 0.0023874423932284117 2023-01-22 08:23:55.404403: step: 352/77, loss: 0.004484446253627539 2023-01-22 08:23:56.683411: step: 356/77, loss: 0.00024538126308470964 2023-01-22 08:23:58.018253: step: 360/77, loss: 0.003874297020956874 2023-01-22 08:23:59.303830: step: 364/77, loss: 0.012899035587906837 2023-01-22 08:24:00.609496: step: 368/77, loss: 4.1917381167877465e-05 2023-01-22 08:24:01.896709: step: 372/77, loss: 0.0030052876099944115 2023-01-22 08:24:03.216951: step: 376/77, loss: 0.0003932855906896293 2023-01-22 08:24:04.552102: step: 380/77, loss: 0.0002435240603517741 2023-01-22 08:24:05.843001: step: 384/77, loss: 0.0325603261590004 2023-01-22 08:24:07.119303: step: 388/77, loss: 0.03924199938774109 ================================================== Loss: 0.010 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5343511450381679, 'f1': 0.6829268292682927}, 'slot': {'p': 0.4186046511627907, 'r': 0.016260162601626018, 'f1': 0.03130434782608696}, 'combined': 0.02137857900318134, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5343511450381679, 'f1': 0.6829268292682927}, 'slot': {'p': 0.4146341463414634, 'r': 0.015356820234869015, 'f1': 0.029616724738675958}, 'combined': 0.02022605591909578, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 19} Test Russian: {'template': {'p': 0.96, 'r': 0.549618320610687, 'f1': 0.6990291262135923}, 'slot': {'p': 0.43902439024390244, 'r': 0.016260162601626018, 'f1': 0.03135888501742161}, 'combined': 0.021920773992760736, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 20 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:25:48.404560: step: 4/77, loss: 0.00025785062462091446 2023-01-22 08:25:49.644915: step: 8/77, loss: 0.0807093009352684 2023-01-22 08:25:50.964571: step: 12/77, loss: 9.079035953618586e-05 2023-01-22 08:25:52.234079: step: 16/77, loss: 0.003588865976780653 2023-01-22 08:25:53.571106: step: 20/77, loss: 7.154016202548519e-05 2023-01-22 08:25:54.853533: step: 24/77, loss: 0.00013810431119054556 2023-01-22 08:25:56.136407: step: 28/77, loss: 0.001978781074285507 2023-01-22 08:25:57.447070: step: 32/77, loss: 3.17572194035165e-05 2023-01-22 08:25:58.754877: step: 36/77, loss: 0.000227707700105384 2023-01-22 08:26:00.041618: step: 40/77, loss: 0.0004038464685436338 2023-01-22 08:26:01.309334: step: 44/77, loss: 1.2348644304438494e-05 2023-01-22 08:26:02.637623: step: 48/77, loss: 0.00803268514573574 2023-01-22 08:26:03.906614: step: 52/77, loss: 0.013548798859119415 2023-01-22 08:26:05.178420: step: 56/77, loss: 0.0005574793322011828 2023-01-22 08:26:06.464513: step: 60/77, loss: 0.000170299390447326 2023-01-22 08:26:07.729335: step: 64/77, loss: 0.005198408383876085 2023-01-22 08:26:09.060950: step: 68/77, loss: 0.009746159426867962 2023-01-22 08:26:10.303763: step: 72/77, loss: 0.0016452963463962078 2023-01-22 08:26:11.575282: step: 76/77, loss: 0.0001234847295563668 2023-01-22 08:26:12.892138: step: 80/77, loss: 0.0015508176293224096 2023-01-22 08:26:14.198100: step: 84/77, loss: 2.2406420612242073e-05 2023-01-22 08:26:15.498438: step: 88/77, loss: 0.004509768448770046 2023-01-22 08:26:16.770844: step: 92/77, loss: 0.00036657514283433557 2023-01-22 08:26:18.044167: step: 96/77, loss: 7.71563354646787e-05 2023-01-22 08:26:19.306170: step: 100/77, loss: 0.005974154453724623 2023-01-22 08:26:20.593709: step: 104/77, loss: 8.223913027904928e-06 2023-01-22 08:26:21.832938: step: 108/77, loss: 0.0004409156972542405 2023-01-22 08:26:23.102473: step: 112/77, loss: 0.000953411974478513 2023-01-22 08:26:24.383807: step: 116/77, loss: 0.0015970554668456316 2023-01-22 08:26:25.676627: step: 120/77, loss: 0.003411682788282633 2023-01-22 08:26:26.972313: step: 124/77, loss: 0.053259797394275665 2023-01-22 08:26:28.249305: step: 128/77, loss: 0.00028643987025134265 2023-01-22 08:26:29.539183: step: 132/77, loss: 0.015228739939630032 2023-01-22 08:26:30.831939: step: 136/77, loss: 0.0001034773958963342 2023-01-22 08:26:32.139203: step: 140/77, loss: 0.0046743834391236305 2023-01-22 08:26:33.466888: step: 144/77, loss: 0.029333055019378662 2023-01-22 08:26:34.759824: step: 148/77, loss: 1.1347108738846146e-05 2023-01-22 08:26:36.102865: step: 152/77, loss: 0.0005708672106266022 2023-01-22 08:26:37.420213: step: 156/77, loss: 5.751201115344884e-06 2023-01-22 08:26:38.697451: step: 160/77, loss: 4.07801526307594e-05 2023-01-22 08:26:39.974198: step: 164/77, loss: 0.0014126194873824716 2023-01-22 08:26:41.261729: step: 168/77, loss: 0.00010916890460066497 2023-01-22 08:26:42.545691: step: 172/77, loss: 0.09491101652383804 2023-01-22 08:26:43.890410: step: 176/77, loss: 6.833251245552674e-05 2023-01-22 08:26:45.241607: step: 180/77, loss: 0.041619233787059784 2023-01-22 08:26:46.570246: step: 184/77, loss: 1.1870360140164848e-05 2023-01-22 08:26:47.870990: step: 188/77, loss: 0.0008768976549617946 2023-01-22 08:26:49.178022: step: 192/77, loss: 0.0011280208127573133 2023-01-22 08:26:50.504604: step: 196/77, loss: 0.0035699442960321903 2023-01-22 08:26:51.734229: step: 200/77, loss: 0.001058831694535911 2023-01-22 08:26:52.999658: step: 204/77, loss: 0.01659216731786728 2023-01-22 08:26:54.264180: step: 208/77, loss: 0.0856424868106842 2023-01-22 08:26:55.541645: step: 212/77, loss: 0.0002852054312825203 2023-01-22 08:26:56.817383: step: 216/77, loss: 3.439107331359992e-06 2023-01-22 08:26:58.096486: step: 220/77, loss: 0.000124076206702739 2023-01-22 08:26:59.359937: step: 224/77, loss: 0.050598494708538055 2023-01-22 08:27:00.669036: step: 228/77, loss: 0.000414682348491624 2023-01-22 08:27:01.959039: step: 232/77, loss: 0.00021670106798410416 2023-01-22 08:27:03.225736: step: 236/77, loss: 1.5346533473348245e-05 2023-01-22 08:27:04.491039: step: 240/77, loss: 0.002009189687669277 2023-01-22 08:27:05.790201: step: 244/77, loss: 0.00034373922972008586 2023-01-22 08:27:07.102193: step: 248/77, loss: 5.6421900808345526e-05 2023-01-22 08:27:08.384982: step: 252/77, loss: 4.0775397792458534e-05 2023-01-22 08:27:09.659287: step: 256/77, loss: 0.00833223108202219 2023-01-22 08:27:10.932903: step: 260/77, loss: 0.08339645713567734 2023-01-22 08:27:12.266965: step: 264/77, loss: 0.0011742322240024805 2023-01-22 08:27:13.534140: step: 268/77, loss: 7.463623478543013e-05 2023-01-22 08:27:14.811404: step: 272/77, loss: 0.05141519010066986 2023-01-22 08:27:16.058425: step: 276/77, loss: 3.3183584946527844e-06 2023-01-22 08:27:17.337844: step: 280/77, loss: 0.026045873761177063 2023-01-22 08:27:18.682196: step: 284/77, loss: 0.0007297683041542768 2023-01-22 08:27:19.996168: step: 288/77, loss: 1.0163198567170184e-05 2023-01-22 08:27:21.299062: step: 292/77, loss: 0.00027464088634587824 2023-01-22 08:27:22.608827: step: 296/77, loss: 0.007331944536417723 2023-01-22 08:27:23.938082: step: 300/77, loss: 0.00548921525478363 2023-01-22 08:27:25.222466: step: 304/77, loss: 0.00010299640416633338 2023-01-22 08:27:26.551686: step: 308/77, loss: 7.229519269458251e-06 2023-01-22 08:27:27.835044: step: 312/77, loss: 0.0007639031973667443 2023-01-22 08:27:29.141920: step: 316/77, loss: 0.00017782395298127085 2023-01-22 08:27:30.452959: step: 320/77, loss: 0.06606744229793549 2023-01-22 08:27:31.737421: step: 324/77, loss: 0.005998269654810429 2023-01-22 08:27:33.042046: step: 328/77, loss: 1.3011542250751518e-05 2023-01-22 08:27:34.326971: step: 332/77, loss: 0.0018053125822916627 2023-01-22 08:27:35.586357: step: 336/77, loss: 0.015008185058832169 2023-01-22 08:27:36.911054: step: 340/77, loss: 0.0002600968873593956 2023-01-22 08:27:38.180633: step: 344/77, loss: 0.012032567523419857 2023-01-22 08:27:39.455814: step: 348/77, loss: 0.009152377024292946 2023-01-22 08:27:40.735838: step: 352/77, loss: 0.0005421206587925553 2023-01-22 08:27:42.037743: step: 356/77, loss: 0.008107601664960384 2023-01-22 08:27:43.302005: step: 360/77, loss: 0.030353447422385216 2023-01-22 08:27:44.604781: step: 364/77, loss: 0.0006560410256497562 2023-01-22 08:27:45.826730: step: 368/77, loss: 5.143606358615216e-06 2023-01-22 08:27:47.148097: step: 372/77, loss: 2.687372762011364e-05 2023-01-22 08:27:48.448739: step: 376/77, loss: 0.002569367177784443 2023-01-22 08:27:49.758527: step: 380/77, loss: 4.8763849918032065e-05 2023-01-22 08:27:51.095139: step: 384/77, loss: 0.005358351860195398 2023-01-22 08:27:52.428786: step: 388/77, loss: 7.558208744740114e-05 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Chinese: {'template': {'p': 0.9696969696969697, 'r': 0.48854961832061067, 'f1': 0.6497461928934011}, 'slot': {'p': 0.42857142857142855, 'r': 0.013550135501355014, 'f1': 0.02626970227670753}, 'combined': 0.01706863904273383, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Korean: {'template': {'p': 0.9692307692307692, 'r': 0.48091603053435117, 'f1': 0.6428571428571428}, 'slot': {'p': 0.4, 'r': 0.012646793134598013, 'f1': 0.0245183887915937}, 'combined': 0.01576182136602452, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Russian: {'template': {'p': 0.9696969696969697, 'r': 0.48854961832061067, 'f1': 0.6497461928934011}, 'slot': {'p': 0.42857142857142855, 'r': 0.013550135501355014, 'f1': 0.02626970227670753}, 'combined': 0.01706863904273383, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 21 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:29:33.756493: step: 4/77, loss: 1.1914366950804833e-05 2023-01-22 08:29:35.020352: step: 8/77, loss: 0.0006677417550235987 2023-01-22 08:29:36.339611: step: 12/77, loss: 0.0010498034534975886 2023-01-22 08:29:37.651852: step: 16/77, loss: 4.68288162664976e-05 2023-01-22 08:29:38.962303: step: 20/77, loss: 0.006344620604068041 2023-01-22 08:29:40.261655: step: 24/77, loss: 0.0009554250864312053 2023-01-22 08:29:41.530818: step: 28/77, loss: 0.00023494433844462037 2023-01-22 08:29:42.839307: step: 32/77, loss: 0.011859744787216187 2023-01-22 08:29:44.152692: step: 36/77, loss: 0.00040839900611899793 2023-01-22 08:29:45.409008: step: 40/77, loss: 0.0011700581526383758 2023-01-22 08:29:46.671282: step: 44/77, loss: 0.00013149564620107412 2023-01-22 08:29:47.956262: step: 48/77, loss: 0.029546651989221573 2023-01-22 08:29:49.240325: step: 52/77, loss: 0.001863011159002781 2023-01-22 08:29:50.508140: step: 56/77, loss: 0.07704512774944305 2023-01-22 08:29:51.801033: step: 60/77, loss: 0.022442875429987907 2023-01-22 08:29:53.127818: step: 64/77, loss: 0.0035350825637578964 2023-01-22 08:29:54.428013: step: 68/77, loss: 0.0333038829267025 2023-01-22 08:29:55.734452: step: 72/77, loss: 0.008941985666751862 2023-01-22 08:29:57.002935: step: 76/77, loss: 1.3948665582574904e-05 2023-01-22 08:29:58.315418: step: 80/77, loss: 0.00012010518548777327 2023-01-22 08:29:59.575208: step: 84/77, loss: 0.0011105663143098354 2023-01-22 08:30:00.898422: step: 88/77, loss: 0.00029640455613844097 2023-01-22 08:30:02.203922: step: 92/77, loss: 0.004373971838504076 2023-01-22 08:30:03.523455: step: 96/77, loss: 0.0002705455117393285 2023-01-22 08:30:04.818190: step: 100/77, loss: 0.0009135695872828364 2023-01-22 08:30:06.111802: step: 104/77, loss: 0.00013783590111415833 2023-01-22 08:30:07.405352: step: 108/77, loss: 5.1556075050029904e-05 2023-01-22 08:30:08.669426: step: 112/77, loss: 0.005635536275804043 2023-01-22 08:30:09.909944: step: 116/77, loss: 0.026683399453759193 2023-01-22 08:30:11.221270: step: 120/77, loss: 4.783980330103077e-06 2023-01-22 08:30:12.545456: step: 124/77, loss: 6.202296208357438e-05 2023-01-22 08:30:13.836865: step: 128/77, loss: 3.960451977036428e-06 2023-01-22 08:30:15.078198: step: 132/77, loss: 3.1410495466843713e-06 2023-01-22 08:30:16.392866: step: 136/77, loss: 2.6866157440963434e-06 2023-01-22 08:30:17.700520: step: 140/77, loss: 0.027216006070375443 2023-01-22 08:30:18.984783: step: 144/77, loss: 0.001939344103448093 2023-01-22 08:30:20.309692: step: 148/77, loss: 0.00010330761870136485 2023-01-22 08:30:21.577237: step: 152/77, loss: 0.03519413247704506 2023-01-22 08:30:22.844635: step: 156/77, loss: 0.01529156044125557 2023-01-22 08:30:24.122905: step: 160/77, loss: 0.00013548173592425883 2023-01-22 08:30:25.372619: step: 164/77, loss: 0.00017336659948341548 2023-01-22 08:30:26.650247: step: 168/77, loss: 1.773536132532172e-05 2023-01-22 08:30:27.947703: step: 172/77, loss: 0.0007969909347593784 2023-01-22 08:30:29.226425: step: 176/77, loss: 0.000521956360898912 2023-01-22 08:30:30.485496: step: 180/77, loss: 0.0014356840401887894 2023-01-22 08:30:31.795620: step: 184/77, loss: 0.010686034336686134 2023-01-22 08:30:33.080553: step: 188/77, loss: 3.401363574084826e-05 2023-01-22 08:30:34.413513: step: 192/77, loss: 0.0011438775109127164 2023-01-22 08:30:35.682578: step: 196/77, loss: 0.0010469158878549933 2023-01-22 08:30:36.979874: step: 200/77, loss: 6.212064909050241e-05 2023-01-22 08:30:38.339433: step: 204/77, loss: 0.0015256913611665368 2023-01-22 08:30:39.625630: step: 208/77, loss: 0.05329586938023567 2023-01-22 08:30:40.913853: step: 212/77, loss: 2.653541559993755e-05 2023-01-22 08:30:42.239266: step: 216/77, loss: 7.547883797087707e-06 2023-01-22 08:30:43.521626: step: 220/77, loss: 0.0016823242185637355 2023-01-22 08:30:44.807494: step: 224/77, loss: 0.00011696373258018866 2023-01-22 08:30:46.078437: step: 228/77, loss: 4.268263000994921e-05 2023-01-22 08:30:47.373384: step: 232/77, loss: 0.0003383099683560431 2023-01-22 08:30:48.661895: step: 236/77, loss: 0.022379782050848007 2023-01-22 08:30:50.003912: step: 240/77, loss: 0.0016691423952579498 2023-01-22 08:30:51.303429: step: 244/77, loss: 2.1457188267959282e-05 2023-01-22 08:30:52.627674: step: 248/77, loss: 0.001095449784770608 2023-01-22 08:30:54.001810: step: 252/77, loss: 0.00016877110465429723 2023-01-22 08:30:55.291012: step: 256/77, loss: 0.0005651089013554156 2023-01-22 08:30:56.595222: step: 260/77, loss: 0.02748904563486576 2023-01-22 08:30:57.915608: step: 264/77, loss: 0.0003405744209885597 2023-01-22 08:30:59.203513: step: 268/77, loss: 1.212671213579597e-05 2023-01-22 08:31:00.530847: step: 272/77, loss: 0.0006532514235004783 2023-01-22 08:31:01.849057: step: 276/77, loss: 0.008219941519200802 2023-01-22 08:31:03.137689: step: 280/77, loss: 0.00036534247919917107 2023-01-22 08:31:04.438348: step: 284/77, loss: 0.00047409304534085095 2023-01-22 08:31:05.722741: step: 288/77, loss: 9.568202949594706e-05 2023-01-22 08:31:07.013529: step: 292/77, loss: 1.4007066795329592e-07 2023-01-22 08:31:08.305444: step: 296/77, loss: 0.025079643353819847 2023-01-22 08:31:09.616861: step: 300/77, loss: 7.805313543940429e-06 2023-01-22 08:31:10.931212: step: 304/77, loss: 4.479253038880415e-05 2023-01-22 08:31:12.265648: step: 308/77, loss: 0.001709498930722475 2023-01-22 08:31:13.514374: step: 312/77, loss: 0.00020420948567334563 2023-01-22 08:31:14.799526: step: 316/77, loss: 1.8438247934682295e-05 2023-01-22 08:31:16.166859: step: 320/77, loss: 0.12470944225788116 2023-01-22 08:31:17.449051: step: 324/77, loss: 0.0016903901705518365 2023-01-22 08:31:18.740777: step: 328/77, loss: 0.00316980411298573 2023-01-22 08:31:20.002024: step: 332/77, loss: 0.0025733783841133118 2023-01-22 08:31:21.353016: step: 336/77, loss: 0.00011846191773656756 2023-01-22 08:31:22.658536: step: 340/77, loss: 0.0011499988613650203 2023-01-22 08:31:23.941426: step: 344/77, loss: 1.996714672714006e-06 2023-01-22 08:31:25.248650: step: 348/77, loss: 8.102708670776337e-05 2023-01-22 08:31:26.549611: step: 352/77, loss: 0.00878163706511259 2023-01-22 08:31:27.845254: step: 356/77, loss: 0.0012174914591014385 2023-01-22 08:31:29.128780: step: 360/77, loss: 4.949744834448211e-06 2023-01-22 08:31:30.355131: step: 364/77, loss: 0.0038780434988439083 2023-01-22 08:31:31.650404: step: 368/77, loss: 0.012070821598172188 2023-01-22 08:31:32.940386: step: 372/77, loss: 0.0018070744117721915 2023-01-22 08:31:34.214821: step: 376/77, loss: 7.179051863204222e-06 2023-01-22 08:31:35.505065: step: 380/77, loss: 0.00012617415632121265 2023-01-22 08:31:36.735942: step: 384/77, loss: 0.004800946451723576 2023-01-22 08:31:37.994597: step: 388/77, loss: 0.014380555599927902 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Chinese: {'template': {'p': 0.925, 'r': 0.5648854961832062, 'f1': 0.7014218009478673}, 'slot': {'p': 0.375, 'r': 0.016260162601626018, 'f1': 0.031168831168831172}, 'combined': 0.02186249769188158, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Korean: {'template': {'p': 0.9240506329113924, 'r': 0.5572519083969466, 'f1': 0.6952380952380953}, 'slot': {'p': 0.375, 'r': 0.016260162601626018, 'f1': 0.031168831168831172}, 'combined': 0.02166975881261596, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 21} Test Russian: {'template': {'p': 0.9240506329113924, 'r': 0.5572519083969466, 'f1': 0.6952380952380953}, 'slot': {'p': 0.36, 'r': 0.016260162601626018, 'f1': 0.031114952463267072}, 'combined': 0.021632300283985682, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 21} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 21} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 22 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:33:19.414887: step: 4/77, loss: 0.0004149131418671459 2023-01-22 08:33:20.712046: step: 8/77, loss: 0.049201954156160355 2023-01-22 08:33:22.031687: step: 12/77, loss: 0.02350142039358616 2023-01-22 08:33:23.304907: step: 16/77, loss: 0.0002929410256911069 2023-01-22 08:33:24.644719: step: 20/77, loss: 0.000536171777639538 2023-01-22 08:33:25.939650: step: 24/77, loss: 0.1399349868297577 2023-01-22 08:33:27.209428: step: 28/77, loss: 0.020580261945724487 2023-01-22 08:33:28.472633: step: 32/77, loss: 0.0659652128815651 2023-01-22 08:33:29.756645: step: 36/77, loss: 0.030944466590881348 2023-01-22 08:33:31.010553: step: 40/77, loss: 4.1995273932116106e-05 2023-01-22 08:33:32.300075: step: 44/77, loss: 0.0005336483009159565 2023-01-22 08:33:33.554381: step: 48/77, loss: 0.00249558687210083 2023-01-22 08:33:34.899459: step: 52/77, loss: 0.011043079197406769 2023-01-22 08:33:36.197211: step: 56/77, loss: 0.002647034591063857 2023-01-22 08:33:37.497787: step: 60/77, loss: 0.07114987820386887 2023-01-22 08:33:38.764932: step: 64/77, loss: 0.0004703971208073199 2023-01-22 08:33:40.015930: step: 68/77, loss: 0.11193948984146118 2023-01-22 08:33:41.283389: step: 72/77, loss: 0.006072756368666887 2023-01-22 08:33:42.554843: step: 76/77, loss: 0.004863828886300325 2023-01-22 08:33:43.861053: step: 80/77, loss: 0.0003708004660438746 2023-01-22 08:33:45.157489: step: 84/77, loss: 0.034591663628816605 2023-01-22 08:33:46.439499: step: 88/77, loss: 0.0032617237884551287 2023-01-22 08:33:47.702360: step: 92/77, loss: 0.0001561310637043789 2023-01-22 08:33:48.986220: step: 96/77, loss: 2.1224195734248497e-05 2023-01-22 08:33:50.255334: step: 100/77, loss: 0.0001106334530049935 2023-01-22 08:33:51.591952: step: 104/77, loss: 5.8119076129514724e-05 2023-01-22 08:33:52.870686: step: 108/77, loss: 0.00011468568118289113 2023-01-22 08:33:54.158103: step: 112/77, loss: 1.173296641354682e-05 2023-01-22 08:33:55.443709: step: 116/77, loss: 0.03556504100561142 2023-01-22 08:33:56.717704: step: 120/77, loss: 0.0009185223607346416 2023-01-22 08:33:58.017708: step: 124/77, loss: 0.0006989326211623847 2023-01-22 08:33:59.274119: step: 128/77, loss: 0.0005934851942583919 2023-01-22 08:34:00.603996: step: 132/77, loss: 0.00019201381655875593 2023-01-22 08:34:01.882801: step: 136/77, loss: 0.0001522630627732724 2023-01-22 08:34:03.113568: step: 140/77, loss: 0.00010733659291872755 2023-01-22 08:34:04.362630: step: 144/77, loss: 0.00011833933967864141 2023-01-22 08:34:05.625245: step: 148/77, loss: 0.0002826797135639936 2023-01-22 08:34:06.902032: step: 152/77, loss: 2.389593646512367e-05 2023-01-22 08:34:08.184263: step: 156/77, loss: 0.03188958019018173 2023-01-22 08:34:09.506395: step: 160/77, loss: 7.080078648868948e-05 2023-01-22 08:34:10.774753: step: 164/77, loss: 0.00020094559295102954 2023-01-22 08:34:12.028452: step: 168/77, loss: 4.374063792056404e-05 2023-01-22 08:34:13.305071: step: 172/77, loss: 1.4840891253697919e-06 2023-01-22 08:34:14.550019: step: 176/77, loss: 0.033406343311071396 2023-01-22 08:34:15.818560: step: 180/77, loss: 5.9410504036350176e-05 2023-01-22 08:34:17.067568: step: 184/77, loss: 5.215402509861633e-08 2023-01-22 08:34:18.356478: step: 188/77, loss: 0.00017796778411138803 2023-01-22 08:34:19.680097: step: 192/77, loss: 5.456154212879483e-06 2023-01-22 08:34:21.019033: step: 196/77, loss: 0.0021971790120005608 2023-01-22 08:34:22.290533: step: 200/77, loss: 0.0001755795383360237 2023-01-22 08:34:23.582625: step: 204/77, loss: 0.00436083460226655 2023-01-22 08:34:24.896304: step: 208/77, loss: 7.003449127296335e-07 2023-01-22 08:34:26.136712: step: 212/77, loss: 1.8607093807077035e-05 2023-01-22 08:34:27.469143: step: 216/77, loss: 0.006038544233888388 2023-01-22 08:34:28.774752: step: 220/77, loss: 2.2351736461700966e-08 2023-01-22 08:34:30.110939: step: 224/77, loss: 1.1814416211564094e-05 2023-01-22 08:34:31.407014: step: 228/77, loss: 0.00021583585476037115 2023-01-22 08:34:32.712777: step: 232/77, loss: 0.0002107950858771801 2023-01-22 08:34:33.985975: step: 236/77, loss: 2.4991000827867538e-05 2023-01-22 08:34:35.260714: step: 240/77, loss: 0.00030950052314437926 2023-01-22 08:34:36.559415: step: 244/77, loss: 0.005680656060576439 2023-01-22 08:34:37.881588: step: 248/77, loss: 2.7505677735462086e-06 2023-01-22 08:34:39.107323: step: 252/77, loss: 0.0005136749241501093 2023-01-22 08:34:40.406108: step: 256/77, loss: 4.056877878610976e-05 2023-01-22 08:34:41.695057: step: 260/77, loss: 0.0026249089278280735 2023-01-22 08:34:43.029430: step: 264/77, loss: 0.0010923325316980481 2023-01-22 08:34:44.344614: step: 268/77, loss: 1.0708590707508847e-05 2023-01-22 08:34:45.626492: step: 272/77, loss: 6.018809017405147e-06 2023-01-22 08:34:46.957167: step: 276/77, loss: 0.04078038036823273 2023-01-22 08:34:48.230914: step: 280/77, loss: 9.682937525212765e-05 2023-01-22 08:34:49.503550: step: 284/77, loss: 0.0004811729013454169 2023-01-22 08:34:50.786516: step: 288/77, loss: 1.8589149476611055e-05 2023-01-22 08:34:52.082794: step: 292/77, loss: 0.020074518397450447 2023-01-22 08:34:53.393273: step: 296/77, loss: 3.901428863173351e-05 2023-01-22 08:34:54.667434: step: 300/77, loss: 0.032694969326257706 2023-01-22 08:34:55.965405: step: 304/77, loss: 0.00020149351621512324 2023-01-22 08:34:57.240642: step: 308/77, loss: 0.03729041665792465 2023-01-22 08:34:58.538812: step: 312/77, loss: 7.061958604026586e-05 2023-01-22 08:34:59.838909: step: 316/77, loss: 0.002287800656631589 2023-01-22 08:35:01.150503: step: 320/77, loss: 0.00011647411156445742 2023-01-22 08:35:02.463352: step: 324/77, loss: 0.005575717426836491 2023-01-22 08:35:03.740705: step: 328/77, loss: 5.3803167247679085e-05 2023-01-22 08:35:05.031634: step: 332/77, loss: 0.032028112560510635 2023-01-22 08:35:06.301146: step: 336/77, loss: 0.025905797258019447 2023-01-22 08:35:07.627066: step: 340/77, loss: 0.0010144426487386227 2023-01-22 08:35:08.923904: step: 344/77, loss: 0.0006542807095684111 2023-01-22 08:35:10.250425: step: 348/77, loss: 3.084996569668874e-05 2023-01-22 08:35:11.556760: step: 352/77, loss: 0.050253357738256454 2023-01-22 08:35:12.899903: step: 356/77, loss: 0.00022352926316671073 2023-01-22 08:35:14.184065: step: 360/77, loss: 0.002475632121786475 2023-01-22 08:35:15.466042: step: 364/77, loss: 0.00017036257486324757 2023-01-22 08:35:16.762637: step: 368/77, loss: 2.4630685402371455e-06 2023-01-22 08:35:18.000003: step: 372/77, loss: 1.9371481130292523e-07 2023-01-22 08:35:19.293786: step: 376/77, loss: 4.898713086731732e-06 2023-01-22 08:35:20.616254: step: 380/77, loss: 2.603577740956098e-05 2023-01-22 08:35:21.913585: step: 384/77, loss: 0.055696628987789154 2023-01-22 08:35:23.202150: step: 388/77, loss: 3.929721424356103e-05 ================================================== Loss: 0.010 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Chinese: {'template': {'p': 0.984375, 'r': 0.48091603053435117, 'f1': 0.6461538461538462}, 'slot': {'p': 0.3793103448275862, 'r': 0.00993676603432701, 'f1': 0.01936619718309859}, 'combined': 0.012513542795232936, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Korean: {'template': {'p': 0.96875, 'r': 0.4732824427480916, 'f1': 0.6358974358974359}, 'slot': {'p': 0.3793103448275862, 'r': 0.00993676603432701, 'f1': 0.01936619718309859}, 'combined': 0.012314915131816538, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Russian: {'template': {'p': 0.9696969696969697, 'r': 0.48854961832061067, 'f1': 0.6497461928934011}, 'slot': {'p': 0.3793103448275862, 'r': 0.00993676603432701, 'f1': 0.01936619718309859}, 'combined': 0.012583112890541218, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 23 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:37:04.648538: step: 4/77, loss: 0.012566723860800266 2023-01-22 08:37:05.928649: step: 8/77, loss: 8.478707513859263e-07 2023-01-22 08:37:07.234329: step: 12/77, loss: 0.005080666858702898 2023-01-22 08:37:08.567810: step: 16/77, loss: 0.0034914321731776 2023-01-22 08:37:09.858507: step: 20/77, loss: 0.0004792654071934521 2023-01-22 08:37:11.183075: step: 24/77, loss: 0.00010682813444873318 2023-01-22 08:37:12.454905: step: 28/77, loss: 7.045550592010841e-05 2023-01-22 08:37:13.727607: step: 32/77, loss: 8.381021325476468e-05 2023-01-22 08:37:14.984517: step: 36/77, loss: 0.009439518675208092 2023-01-22 08:37:16.258859: step: 40/77, loss: 0.005691776983439922 2023-01-22 08:37:17.563886: step: 44/77, loss: 0.0011711755068972707 2023-01-22 08:37:18.886626: step: 48/77, loss: 0.003150845179334283 2023-01-22 08:37:20.190228: step: 52/77, loss: 0.00031852780375629663 2023-01-22 08:37:21.434450: step: 56/77, loss: 0.0007815890712663531 2023-01-22 08:37:22.730433: step: 60/77, loss: 0.0007130279554985464 2023-01-22 08:37:24.065388: step: 64/77, loss: 0.0240264181047678 2023-01-22 08:37:25.360066: step: 68/77, loss: 0.00012617622269317508 2023-01-22 08:37:26.622208: step: 72/77, loss: 0.018813449889421463 2023-01-22 08:37:27.908957: step: 76/77, loss: 0.004636474419385195 2023-01-22 08:37:29.187589: step: 80/77, loss: 0.00027595015126280487 2023-01-22 08:37:30.495147: step: 84/77, loss: 6.334046702249907e-06 2023-01-22 08:37:31.783376: step: 88/77, loss: 0.029080556705594063 2023-01-22 08:37:33.057769: step: 92/77, loss: 0.018633205443620682 2023-01-22 08:37:34.331077: step: 96/77, loss: 0.02069343626499176 2023-01-22 08:37:35.651423: step: 100/77, loss: 0.0008100596605800092 2023-01-22 08:37:36.956100: step: 104/77, loss: 7.562098289781716e-06 2023-01-22 08:37:38.182995: step: 108/77, loss: 3.5552866393118165e-06 2023-01-22 08:37:39.433710: step: 112/77, loss: 0.0007956930203363299 2023-01-22 08:37:40.690244: step: 116/77, loss: 1.4057108273846097e-05 2023-01-22 08:37:42.025504: step: 120/77, loss: 0.00010456127347424626 2023-01-22 08:37:43.339473: step: 124/77, loss: 7.063026146170159e-07 2023-01-22 08:37:44.617293: step: 128/77, loss: 0.00023995916126295924 2023-01-22 08:37:45.910103: step: 132/77, loss: 0.02845638059079647 2023-01-22 08:37:47.143056: step: 136/77, loss: 0.030174342915415764 2023-01-22 08:37:48.443546: step: 140/77, loss: 0.0004656286328099668 2023-01-22 08:37:49.816285: step: 144/77, loss: 0.00016981828957796097 2023-01-22 08:37:51.141506: step: 148/77, loss: 2.4861627025529742e-05 2023-01-22 08:37:52.395789: step: 152/77, loss: 4.1571697693143506e-06 2023-01-22 08:37:53.657144: step: 156/77, loss: 0.0012472313828766346 2023-01-22 08:37:54.908039: step: 160/77, loss: 0.0007646206067875028 2023-01-22 08:37:56.168950: step: 164/77, loss: 3.660557194962166e-05 2023-01-22 08:37:57.523465: step: 168/77, loss: 1.2873996411144617e-06 2023-01-22 08:37:58.850007: step: 172/77, loss: 5.140098437550478e-06 2023-01-22 08:38:00.133350: step: 176/77, loss: 5.215400022962058e-08 2023-01-22 08:38:01.371803: step: 180/77, loss: 0.007089770864695311 2023-01-22 08:38:02.620650: step: 184/77, loss: 6.2692542996956035e-06 2023-01-22 08:38:03.913018: step: 188/77, loss: 0.001343978801742196 2023-01-22 08:38:05.202781: step: 192/77, loss: 0.05258629098534584 2023-01-22 08:38:06.504742: step: 196/77, loss: 0.0030400222167372704 2023-01-22 08:38:07.770709: step: 200/77, loss: 0.02726101316511631 2023-01-22 08:38:09.051047: step: 204/77, loss: 0.06454948335886002 2023-01-22 08:38:10.355201: step: 208/77, loss: 5.5959285418794025e-06 2023-01-22 08:38:11.645682: step: 212/77, loss: 0.0009825187735259533 2023-01-22 08:38:12.931181: step: 216/77, loss: 4.3568870751187205e-06 2023-01-22 08:38:14.244752: step: 220/77, loss: 0.0007414943538606167 2023-01-22 08:38:15.486667: step: 224/77, loss: 1.1882290891662706e-05 2023-01-22 08:38:16.775732: step: 228/77, loss: 0.016352159902453423 2023-01-22 08:38:18.095329: step: 232/77, loss: 0.00029403012013062835 2023-01-22 08:38:19.447489: step: 236/77, loss: 0.013957513496279716 2023-01-22 08:38:20.801211: step: 240/77, loss: 0.0008857838693074882 2023-01-22 08:38:22.123430: step: 244/77, loss: 2.8222179025760852e-05 2023-01-22 08:38:23.426117: step: 248/77, loss: 0.00010344553447794169 2023-01-22 08:38:24.752255: step: 252/77, loss: 2.9653216415681527e-07 2023-01-22 08:38:26.021395: step: 256/77, loss: 1.1920915454766146e-07 2023-01-22 08:38:27.304285: step: 260/77, loss: 0.007125942036509514 2023-01-22 08:38:28.596815: step: 264/77, loss: 5.081273002360831e-07 2023-01-22 08:38:29.913425: step: 268/77, loss: 0.029344888404011726 2023-01-22 08:38:31.180072: step: 272/77, loss: 2.980204101277195e-07 2023-01-22 08:38:32.444749: step: 276/77, loss: 0.001337407506071031 2023-01-22 08:38:33.734957: step: 280/77, loss: 0.0013053520815446973 2023-01-22 08:38:35.027783: step: 284/77, loss: 0.00030600311583839357 2023-01-22 08:38:36.296105: step: 288/77, loss: 0.004457205068320036 2023-01-22 08:38:37.553662: step: 292/77, loss: 5.743621386500308e-06 2023-01-22 08:38:38.871574: step: 296/77, loss: 5.672270162904169e-06 2023-01-22 08:38:40.219889: step: 300/77, loss: 8.01328060333617e-05 2023-01-22 08:38:41.510853: step: 304/77, loss: 0.0063082557171583176 2023-01-22 08:38:42.781143: step: 308/77, loss: 0.0002656308060977608 2023-01-22 08:38:44.048839: step: 312/77, loss: 0.0001685560418991372 2023-01-22 08:38:45.359986: step: 316/77, loss: 5.051433618064038e-07 2023-01-22 08:38:46.698567: step: 320/77, loss: 0.0030908186454325914 2023-01-22 08:38:47.971604: step: 324/77, loss: 1.1965148587478325e-06 2023-01-22 08:38:49.291377: step: 328/77, loss: 0.00015451980289071798 2023-01-22 08:38:50.615300: step: 332/77, loss: 0.000776467437390238 2023-01-22 08:38:51.947410: step: 336/77, loss: 8.403077663388103e-05 2023-01-22 08:38:53.240076: step: 340/77, loss: 0.05382693558931351 2023-01-22 08:38:54.507027: step: 344/77, loss: 0.00024840643163770437 2023-01-22 08:38:55.838700: step: 348/77, loss: 0.0008327533723786473 2023-01-22 08:38:57.170790: step: 352/77, loss: 2.0257557480363175e-05 2023-01-22 08:38:58.473520: step: 356/77, loss: 1.4881431525282096e-05 2023-01-22 08:38:59.779372: step: 360/77, loss: 6.568313438037876e-06 2023-01-22 08:39:01.035628: step: 364/77, loss: 7.910434942459688e-05 2023-01-22 08:39:02.306252: step: 368/77, loss: 0.0018795005744323134 2023-01-22 08:39:03.613305: step: 372/77, loss: 1.0481529898243025e-05 2023-01-22 08:39:04.935537: step: 376/77, loss: 8.545060154574458e-06 2023-01-22 08:39:06.260638: step: 380/77, loss: 0.19529400765895844 2023-01-22 08:39:07.542018: step: 384/77, loss: 1.5300051018130034e-05 2023-01-22 08:39:08.839973: step: 388/77, loss: 2.5652367185102776e-05 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Chinese: {'template': {'p': 0.9714285714285714, 'r': 0.5190839694656488, 'f1': 0.6766169154228854}, 'slot': {'p': 0.48, 'r': 0.01084010840108401, 'f1': 0.02120141342756184}, 'combined': 0.014345234955962235, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Korean: {'template': {'p': 0.9577464788732394, 'r': 0.5190839694656488, 'f1': 0.6732673267326732}, 'slot': {'p': 0.4583333333333333, 'r': 0.00993676603432701, 'f1': 0.01945181255526083}, 'combined': 0.013096269839185508, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Russian: {'template': {'p': 0.9583333333333334, 'r': 0.5267175572519084, 'f1': 0.6798029556650246}, 'slot': {'p': 0.48, 'r': 0.01084010840108401, 'f1': 0.02120141342756184}, 'combined': 0.014412783512332678, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 24 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:40:49.860902: step: 4/77, loss: 7.27365659258794e-06 2023-01-22 08:40:51.170932: step: 8/77, loss: 1.238247136825521e-06 2023-01-22 08:40:52.434440: step: 12/77, loss: 3.6491248465608805e-05 2023-01-22 08:40:53.739540: step: 16/77, loss: 1.490007525717374e-05 2023-01-22 08:40:54.990902: step: 20/77, loss: 1.8179339633661584e-07 2023-01-22 08:40:56.296638: step: 24/77, loss: 2.4972685423563235e-05 2023-01-22 08:40:57.537887: step: 28/77, loss: 2.0712549542167835e-07 2023-01-22 08:40:58.793990: step: 32/77, loss: 0.013989298604428768 2023-01-22 08:41:00.133741: step: 36/77, loss: 1.8670249346541823e-06 2023-01-22 08:41:01.405966: step: 40/77, loss: 1.4274588693297119e-06 2023-01-22 08:41:02.748151: step: 44/77, loss: 0.0037133987061679363 2023-01-22 08:41:04.093239: step: 48/77, loss: 0.08478113263845444 2023-01-22 08:41:05.355391: step: 52/77, loss: 0.0002885919820982963 2023-01-22 08:41:06.627700: step: 56/77, loss: 0.039048902690410614 2023-01-22 08:41:07.936740: step: 60/77, loss: 0.00010248593025607988 2023-01-22 08:41:09.245826: step: 64/77, loss: 7.2267857831320725e-06 2023-01-22 08:41:10.524224: step: 68/77, loss: 0.022682351991534233 2023-01-22 08:41:11.798150: step: 72/77, loss: 8.319793414557353e-05 2023-01-22 08:41:13.104738: step: 76/77, loss: 0.11278786510229111 2023-01-22 08:41:14.376711: step: 80/77, loss: 0.00027442857390269637 2023-01-22 08:41:15.603385: step: 84/77, loss: 0.003319286974146962 2023-01-22 08:41:16.914299: step: 88/77, loss: 0.0001222712453454733 2023-01-22 08:41:18.259642: step: 92/77, loss: 0.01310756802558899 2023-01-22 08:41:19.544060: step: 96/77, loss: 7.5497505349630956e-06 2023-01-22 08:41:20.824174: step: 100/77, loss: 0.06587717682123184 2023-01-22 08:41:22.074255: step: 104/77, loss: 0.0018411398632451892 2023-01-22 08:41:23.423477: step: 108/77, loss: 0.0669705793261528 2023-01-22 08:41:24.733204: step: 112/77, loss: 6.61185276840115e-06 2023-01-22 08:41:25.974996: step: 116/77, loss: 0.017078397795557976 2023-01-22 08:41:27.246338: step: 120/77, loss: 0.0007926687248982489 2023-01-22 08:41:28.522052: step: 124/77, loss: 7.0508122007595375e-06 2023-01-22 08:41:29.827495: step: 128/77, loss: 0.016395514830946922 2023-01-22 08:41:31.083275: step: 132/77, loss: 0.00024466862669214606 2023-01-22 08:41:32.355917: step: 136/77, loss: 2.8732954888255335e-05 2023-01-22 08:41:33.647643: step: 140/77, loss: 2.6793524739332497e-05 2023-01-22 08:41:34.959524: step: 144/77, loss: 0.00013426571968011558 2023-01-22 08:41:36.290937: step: 148/77, loss: 0.007598715368658304 2023-01-22 08:41:37.542252: step: 152/77, loss: 3.5730990930460393e-05 2023-01-22 08:41:38.894537: step: 156/77, loss: 0.021153738722205162 2023-01-22 08:41:40.178810: step: 160/77, loss: 3.308024076886795e-07 2023-01-22 08:41:41.431719: step: 164/77, loss: 1.2218577012390597e-06 2023-01-22 08:41:42.765789: step: 168/77, loss: 2.4686836695764214e-05 2023-01-22 08:41:44.076960: step: 172/77, loss: 2.7910971766687e-05 2023-01-22 08:41:45.377742: step: 176/77, loss: 1.3962063576400396e-06 2023-01-22 08:41:46.648473: step: 180/77, loss: 0.00042038323590531945 2023-01-22 08:41:47.954274: step: 184/77, loss: 7.789325536577962e-06 2023-01-22 08:41:49.283046: step: 188/77, loss: 0.0001271903602173552 2023-01-22 08:41:50.624400: step: 192/77, loss: 0.002623402513563633 2023-01-22 08:41:51.871377: step: 196/77, loss: 0.004340842831879854 2023-01-22 08:41:53.206777: step: 200/77, loss: 2.1268249838612974e-05 2023-01-22 08:41:54.519157: step: 204/77, loss: 3.217386256437749e-05 2023-01-22 08:41:55.812939: step: 208/77, loss: 5.2126033551758155e-05 2023-01-22 08:41:57.158034: step: 212/77, loss: 2.491239683877211e-06 2023-01-22 08:41:58.447921: step: 216/77, loss: 0.00140212825499475 2023-01-22 08:41:59.710925: step: 220/77, loss: 0.02959577552974224 2023-01-22 08:42:01.032605: step: 224/77, loss: 2.7585945645114407e-05 2023-01-22 08:42:02.343405: step: 228/77, loss: 0.02623019367456436 2023-01-22 08:42:03.616533: step: 232/77, loss: 0.00029231017106212676 2023-01-22 08:42:04.913976: step: 236/77, loss: 0.00011233628174522892 2023-01-22 08:42:06.192214: step: 240/77, loss: 2.836989324350725e-06 2023-01-22 08:42:07.433425: step: 244/77, loss: 4.0828840042195225e-07 2023-01-22 08:42:08.681487: step: 248/77, loss: 5.672002316714497e-06 2023-01-22 08:42:09.902500: step: 252/77, loss: 1.2763992344844155e-05 2023-01-22 08:42:11.153777: step: 256/77, loss: 3.2168070447369246e-06 2023-01-22 08:42:12.429581: step: 260/77, loss: 0.004356747958809137 2023-01-22 08:42:13.717187: step: 264/77, loss: 1.299349150940543e-06 2023-01-22 08:42:14.959315: step: 268/77, loss: 0.0002825258707161993 2023-01-22 08:42:16.287609: step: 272/77, loss: 1.18460711746593e-05 2023-01-22 08:42:17.586309: step: 276/77, loss: 5.490722742251819e-06 2023-01-22 08:42:18.913679: step: 280/77, loss: 0.01454936247318983 2023-01-22 08:42:20.209269: step: 284/77, loss: 0.048553213477134705 2023-01-22 08:42:21.484341: step: 288/77, loss: 1.5159343092818744e-05 2023-01-22 08:42:22.757941: step: 292/77, loss: 0.00015713486936874688 2023-01-22 08:42:24.010876: step: 296/77, loss: 0.0001880024210549891 2023-01-22 08:42:25.307848: step: 300/77, loss: 4.002780406153761e-05 2023-01-22 08:42:26.525997: step: 304/77, loss: 0.0004623873101081699 2023-01-22 08:42:27.841658: step: 308/77, loss: 4.9771777412388474e-05 2023-01-22 08:42:29.117686: step: 312/77, loss: 0.0005654781707562506 2023-01-22 08:42:30.400963: step: 316/77, loss: 1.0839068636414595e-05 2023-01-22 08:42:31.675354: step: 320/77, loss: 0.00010301794100087136 2023-01-22 08:42:32.919986: step: 324/77, loss: 1.5710642401245423e-05 2023-01-22 08:42:34.241106: step: 328/77, loss: 0.04304623603820801 2023-01-22 08:42:35.484961: step: 332/77, loss: 3.624851888162084e-05 2023-01-22 08:42:36.772723: step: 336/77, loss: 0.00030866515589877963 2023-01-22 08:42:38.075980: step: 340/77, loss: 6.1949635892233346e-06 2023-01-22 08:42:39.370652: step: 344/77, loss: 4.167402221355587e-05 2023-01-22 08:42:40.650887: step: 348/77, loss: 0.04800989478826523 2023-01-22 08:42:41.922823: step: 352/77, loss: 8.210468536162807e-07 2023-01-22 08:42:43.233820: step: 356/77, loss: 1.8006705431616865e-05 2023-01-22 08:42:44.526853: step: 360/77, loss: 0.0002485654258634895 2023-01-22 08:42:45.843054: step: 364/77, loss: 0.006535383872687817 2023-01-22 08:42:47.136364: step: 368/77, loss: 0.00163769640494138 2023-01-22 08:42:48.434717: step: 372/77, loss: 0.0006401842692866921 2023-01-22 08:42:49.773664: step: 376/77, loss: 1.332126657871413e-06 2023-01-22 08:42:51.110529: step: 380/77, loss: 0.00102709059137851 2023-01-22 08:42:52.399190: step: 384/77, loss: 1.3253316865302622e-05 2023-01-22 08:42:53.748261: step: 388/77, loss: 0.001358823268674314 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 24} Test Chinese: {'template': {'p': 0.9866666666666667, 'r': 0.5648854961832062, 'f1': 0.7184466019417477}, 'slot': {'p': 0.45454545454545453, 'r': 0.013550135501355014, 'f1': 0.02631578947368421}, 'combined': 0.018906489524782834, 'epoch': 24} Dev Korean: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 24} Test Korean: {'template': {'p': 0.9864864864864865, 'r': 0.5572519083969466, 'f1': 0.7121951219512196}, 'slot': {'p': 0.45454545454545453, 'r': 0.013550135501355014, 'f1': 0.02631578947368421}, 'combined': 0.018741976893453145, 'epoch': 24} Dev Russian: {'template': {'p': 0.9722222222222222, 'r': 0.5833333333333334, 'f1': 0.7291666666666666}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05125951962507322, 'epoch': 24} Test Russian: {'template': {'p': 0.9866666666666667, 'r': 0.5648854961832062, 'f1': 0.7184466019417477}, 'slot': {'p': 0.4411764705882353, 'r': 0.013550135501355014, 'f1': 0.02629272567922875}, 'combined': 0.018889919420028423, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 25 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:44:35.183474: step: 4/77, loss: 0.01842794567346573 2023-01-22 08:44:36.474892: step: 8/77, loss: 0.0069998279213905334 2023-01-22 08:44:37.809245: step: 12/77, loss: 8.705254913365934e-06 2023-01-22 08:44:39.090403: step: 16/77, loss: 1.9711498680408113e-05 2023-01-22 08:44:40.378326: step: 20/77, loss: 0.0018902001902461052 2023-01-22 08:44:41.684227: step: 24/77, loss: 9.889909415505826e-05 2023-01-22 08:44:42.931196: step: 28/77, loss: 0.010225817561149597 2023-01-22 08:44:44.191626: step: 32/77, loss: 0.00024162388581316918 2023-01-22 08:44:45.501680: step: 36/77, loss: 4.7459459892706946e-05 2023-01-22 08:44:46.778036: step: 40/77, loss: 0.00014923862181603909 2023-01-22 08:44:48.022675: step: 44/77, loss: 2.682207522752833e-08 2023-01-22 08:44:49.236712: step: 48/77, loss: 0.0014505910221487284 2023-01-22 08:44:50.542045: step: 52/77, loss: 0.0025084619410336018 2023-01-22 08:44:51.863058: step: 56/77, loss: 0.009419834241271019 2023-01-22 08:44:53.153831: step: 60/77, loss: 3.4378666896373034e-05 2023-01-22 08:44:54.398152: step: 64/77, loss: 7.156289939302951e-05 2023-01-22 08:44:55.692293: step: 68/77, loss: 5.87022805120796e-06 2023-01-22 08:44:57.013346: step: 72/77, loss: 5.6899873015936464e-05 2023-01-22 08:44:58.322831: step: 76/77, loss: 0.009436637163162231 2023-01-22 08:44:59.616058: step: 80/77, loss: 2.0404635506565683e-05 2023-01-22 08:45:00.909139: step: 84/77, loss: 0.003238705452531576 2023-01-22 08:45:02.236180: step: 88/77, loss: 0.00015356639050878584 2023-01-22 08:45:03.541701: step: 92/77, loss: 4.204708602628671e-06 2023-01-22 08:45:04.805329: step: 96/77, loss: 3.562411438906565e-05 2023-01-22 08:45:06.109636: step: 100/77, loss: 4.783595068147406e-05 2023-01-22 08:45:07.397019: step: 104/77, loss: 0.0005482888664118946 2023-01-22 08:45:08.693957: step: 108/77, loss: 6.809772798987979e-07 2023-01-22 08:45:09.988494: step: 112/77, loss: 1.9110122593701817e-05 2023-01-22 08:45:11.278375: step: 116/77, loss: 6.269341611186974e-06 2023-01-22 08:45:12.592049: step: 120/77, loss: 0.00022487477690447122 2023-01-22 08:45:13.935739: step: 124/77, loss: 0.00015616673044860363 2023-01-22 08:45:15.217070: step: 128/77, loss: 1.8163501636081492e-06 2023-01-22 08:45:16.524900: step: 132/77, loss: 2.269335482196766e-06 2023-01-22 08:45:17.814485: step: 136/77, loss: 3.60607316451933e-07 2023-01-22 08:45:19.080724: step: 140/77, loss: 0.0011399483773857355 2023-01-22 08:45:20.409185: step: 144/77, loss: 0.00021700444631278515 2023-01-22 08:45:21.691606: step: 148/77, loss: 4.586233899317449e-06 2023-01-22 08:45:23.024076: step: 152/77, loss: 0.025929274037480354 2023-01-22 08:45:24.358396: step: 156/77, loss: 0.14960448443889618 2023-01-22 08:45:25.666294: step: 160/77, loss: 0.0028908923268318176 2023-01-22 08:45:26.923376: step: 164/77, loss: 1.9430433439993067e-06 2023-01-22 08:45:28.156523: step: 168/77, loss: 0.0003239882062189281 2023-01-22 08:45:29.431094: step: 172/77, loss: 0.023600086569786072 2023-01-22 08:45:30.750723: step: 176/77, loss: 2.9996070225024596e-05 2023-01-22 08:45:32.024518: step: 180/77, loss: 0.0023301143664866686 2023-01-22 08:45:33.311433: step: 184/77, loss: 6.973668291720969e-07 2023-01-22 08:45:34.613628: step: 188/77, loss: 4.741271823149873e-06 2023-01-22 08:45:35.885255: step: 192/77, loss: 2.3011994926491752e-05 2023-01-22 08:45:37.157265: step: 196/77, loss: 2.5616154744056985e-05 2023-01-22 08:45:38.466844: step: 200/77, loss: 0.00017460151866544038 2023-01-22 08:45:39.762430: step: 204/77, loss: 1.892438206141378e-07 2023-01-22 08:45:41.081613: step: 208/77, loss: 1.0356236543884734e-06 2023-01-22 08:45:42.412843: step: 212/77, loss: 0.011446312069892883 2023-01-22 08:45:43.721673: step: 216/77, loss: 0.02305716834962368 2023-01-22 08:45:45.004394: step: 220/77, loss: 6.117016710049938e-06 2023-01-22 08:45:46.336340: step: 224/77, loss: 0.03381283953785896 2023-01-22 08:45:47.625889: step: 228/77, loss: 0.00010447140084579587 2023-01-22 08:45:48.903692: step: 232/77, loss: 0.002563286339864135 2023-01-22 08:45:50.256002: step: 236/77, loss: 2.2440067368734162e-06 2023-01-22 08:45:51.532559: step: 240/77, loss: 8.55996859172592e-06 2023-01-22 08:45:52.843121: step: 244/77, loss: 0.03666967898607254 2023-01-22 08:45:54.137710: step: 248/77, loss: 0.0009382938733324409 2023-01-22 08:45:55.452053: step: 252/77, loss: 1.1827602065750398e-05 2023-01-22 08:45:56.778625: step: 256/77, loss: 1.7730711988406256e-05 2023-01-22 08:45:58.078978: step: 260/77, loss: 0.015479068271815777 2023-01-22 08:45:59.346164: step: 264/77, loss: 1.5745154087198898e-05 2023-01-22 08:46:00.647253: step: 268/77, loss: 2.281257366121281e-06 2023-01-22 08:46:01.917323: step: 272/77, loss: 0.00013983561075292528 2023-01-22 08:46:03.182583: step: 276/77, loss: 1.2718570360448211e-05 2023-01-22 08:46:04.428931: step: 280/77, loss: 9.670064901001751e-05 2023-01-22 08:46:05.743284: step: 284/77, loss: 0.0005530952475965023 2023-01-22 08:46:07.029173: step: 288/77, loss: 0.0029514539055526257 2023-01-22 08:46:08.305135: step: 292/77, loss: 0.00041223972220905125 2023-01-22 08:46:09.647636: step: 296/77, loss: 0.016402151435613632 2023-01-22 08:46:10.973242: step: 300/77, loss: 7.928155355330091e-06 2023-01-22 08:46:12.242009: step: 304/77, loss: 0.0235972311347723 2023-01-22 08:46:13.537015: step: 308/77, loss: 0.141937717795372 2023-01-22 08:46:14.822394: step: 312/77, loss: 0.00022633300977759063 2023-01-22 08:46:16.114901: step: 316/77, loss: 4.3762211134890094e-06 2023-01-22 08:46:17.394429: step: 320/77, loss: 2.618359576445073e-05 2023-01-22 08:46:18.716585: step: 324/77, loss: 0.060009852051734924 2023-01-22 08:46:20.083357: step: 328/77, loss: 3.010000000358559e-07 2023-01-22 08:46:21.386896: step: 332/77, loss: 0.0031362068839371204 2023-01-22 08:46:22.698420: step: 336/77, loss: 9.342830935565871e-07 2023-01-22 08:46:23.999802: step: 340/77, loss: 2.3841704432925326e-07 2023-01-22 08:46:25.337318: step: 344/77, loss: 4.756555790663697e-05 2023-01-22 08:46:26.633817: step: 348/77, loss: 0.00014611869119107723 2023-01-22 08:46:27.966005: step: 352/77, loss: 3.068546357098967e-05 2023-01-22 08:46:29.250097: step: 356/77, loss: 0.000274586578598246 2023-01-22 08:46:30.571344: step: 360/77, loss: 2.220268413566373e-07 2023-01-22 08:46:31.864736: step: 364/77, loss: 0.009011710993945599 2023-01-22 08:46:33.159158: step: 368/77, loss: 2.03244894692034e-06 2023-01-22 08:46:34.431974: step: 372/77, loss: 3.11428630084265e-05 2023-01-22 08:46:35.682290: step: 376/77, loss: 8.270069429272553e-07 2023-01-22 08:46:36.937716: step: 380/77, loss: 1.7652571841608733e-05 2023-01-22 08:46:38.227907: step: 384/77, loss: 5.856064717590925e-07 2023-01-22 08:46:39.536508: step: 388/77, loss: 0.05170586705207825 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Chinese: {'template': {'p': 0.9855072463768116, 'r': 0.5190839694656488, 'f1': 0.68}, 'slot': {'p': 0.5, 'r': 0.014453477868112014, 'f1': 0.028094820017559263}, 'combined': 0.0191044776119403, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Korean: {'template': {'p': 0.9855072463768116, 'r': 0.5190839694656488, 'f1': 0.68}, 'slot': {'p': 0.4375, 'r': 0.012646793134598013, 'f1': 0.024582967515364356}, 'combined': 0.016716417910447763, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Russian: {'template': {'p': 0.9857142857142858, 'r': 0.5267175572519084, 'f1': 0.6865671641791045}, 'slot': {'p': 0.4375, 'r': 0.012646793134598013, 'f1': 0.024582967515364356}, 'combined': 0.01687785829413075, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 26 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:48:21.146497: step: 4/77, loss: 5.481765583681408e-06 2023-01-22 08:48:22.404355: step: 8/77, loss: 7.747093150101136e-06 2023-01-22 08:48:23.676589: step: 12/77, loss: 7.727561023784801e-05 2023-01-22 08:48:24.961520: step: 16/77, loss: 5.4686453950125724e-05 2023-01-22 08:48:26.177977: step: 20/77, loss: 1.919623537105508e-05 2023-01-22 08:48:27.522700: step: 24/77, loss: 1.2736864846374374e-05 2023-01-22 08:48:28.797673: step: 28/77, loss: 0.00030758618959225714 2023-01-22 08:48:30.111276: step: 32/77, loss: 1.3266368114273064e-05 2023-01-22 08:48:31.394049: step: 36/77, loss: 2.479479917383287e-06 2023-01-22 08:48:32.663494: step: 40/77, loss: 0.00022657167573925108 2023-01-22 08:48:33.961083: step: 44/77, loss: 0.0003512536932248622 2023-01-22 08:48:35.269572: step: 48/77, loss: 0.0001523706887383014 2023-01-22 08:48:36.550510: step: 52/77, loss: 3.3064022773032775e-06 2023-01-22 08:48:37.846217: step: 56/77, loss: 2.327267975488212e-05 2023-01-22 08:48:39.089829: step: 60/77, loss: 4.443985380930826e-05 2023-01-22 08:48:40.410298: step: 64/77, loss: 4.193187123746611e-05 2023-01-22 08:48:41.741876: step: 68/77, loss: 5.6550146837253124e-05 2023-01-22 08:48:43.033951: step: 72/77, loss: 6.289097655098885e-05 2023-01-22 08:48:44.313047: step: 76/77, loss: 6.619805208174512e-05 2023-01-22 08:48:45.615463: step: 80/77, loss: 8.180631994036958e-07 2023-01-22 08:48:46.908371: step: 84/77, loss: 0.0005231473478488624 2023-01-22 08:48:48.212907: step: 88/77, loss: 5.478812818182632e-05 2023-01-22 08:48:49.512400: step: 92/77, loss: 1.8045072920358507e-06 2023-01-22 08:48:50.833340: step: 96/77, loss: 0.06642985343933105 2023-01-22 08:48:52.119379: step: 100/77, loss: 4.917359888167994e-07 2023-01-22 08:48:53.360397: step: 104/77, loss: 1.1980254157606396e-06 2023-01-22 08:48:54.664863: step: 108/77, loss: 4.111695307074115e-05 2023-01-22 08:48:55.951787: step: 112/77, loss: 1.090757336896786e-06 2023-01-22 08:48:57.226263: step: 116/77, loss: 6.899875734234229e-05 2023-01-22 08:48:58.487581: step: 120/77, loss: 0.00800521019846201 2023-01-22 08:48:59.775664: step: 124/77, loss: 3.220645885448903e-05 2023-01-22 08:49:01.067686: step: 128/77, loss: 0.0010096518089994788 2023-01-22 08:49:02.360554: step: 132/77, loss: 2.1828752778674243e-06 2023-01-22 08:49:03.661432: step: 136/77, loss: 7.152478360694658e-07 2023-01-22 08:49:04.969877: step: 140/77, loss: 0.00018325047858525068 2023-01-22 08:49:06.232620: step: 144/77, loss: 1.4789173292228952e-05 2023-01-22 08:49:07.502240: step: 148/77, loss: 0.0003508915542624891 2023-01-22 08:49:08.865414: step: 152/77, loss: 0.021963827311992645 2023-01-22 08:49:10.136927: step: 156/77, loss: 0.0004927744157612324 2023-01-22 08:49:11.427025: step: 160/77, loss: 0.0031166928820312023 2023-01-22 08:49:12.735559: step: 164/77, loss: 4.25084681410226e-06 2023-01-22 08:49:14.036720: step: 168/77, loss: 0.0006452484522014856 2023-01-22 08:49:15.313130: step: 172/77, loss: 7.3308078754052985e-06 2023-01-22 08:49:16.622904: step: 176/77, loss: 3.883035788021516e-06 2023-01-22 08:49:17.941686: step: 180/77, loss: 0.004640428815037012 2023-01-22 08:49:19.247885: step: 184/77, loss: 0.05861657112836838 2023-01-22 08:49:20.565305: step: 188/77, loss: 0.02396177127957344 2023-01-22 08:49:21.803196: step: 192/77, loss: 2.979371856781654e-05 2023-01-22 08:49:23.057876: step: 196/77, loss: 0.00015523187175858766 2023-01-22 08:49:24.407160: step: 200/77, loss: 5.799734481115593e-06 2023-01-22 08:49:25.697046: step: 204/77, loss: 0.00017246135394088924 2023-01-22 08:49:27.026715: step: 208/77, loss: 0.000321808154694736 2023-01-22 08:49:28.327206: step: 212/77, loss: 6.005029149491747e-07 2023-01-22 08:49:29.601571: step: 216/77, loss: 2.1190633560763672e-05 2023-01-22 08:49:30.900248: step: 220/77, loss: 0.004656767938286066 2023-01-22 08:49:32.181785: step: 224/77, loss: 0.0017464417032897472 2023-01-22 08:49:33.411856: step: 228/77, loss: 0.0005598579300567508 2023-01-22 08:49:34.723693: step: 232/77, loss: 0.00011804819223470986 2023-01-22 08:49:36.026585: step: 236/77, loss: 0.004002984147518873 2023-01-22 08:49:37.331315: step: 240/77, loss: 0.00037862331373617053 2023-01-22 08:49:38.609874: step: 244/77, loss: 0.00023899652296677232 2023-01-22 08:49:39.903816: step: 248/77, loss: 0.001696170074865222 2023-01-22 08:49:41.179898: step: 252/77, loss: 0.00019286083988845348 2023-01-22 08:49:42.443850: step: 256/77, loss: 0.001271429588086903 2023-01-22 08:49:43.733203: step: 260/77, loss: 0.0024752700701355934 2023-01-22 08:49:45.036200: step: 264/77, loss: 0.0002661732432898134 2023-01-22 08:49:46.290903: step: 268/77, loss: 7.176605868153274e-05 2023-01-22 08:49:47.601583: step: 272/77, loss: 0.006966698449105024 2023-01-22 08:49:48.848323: step: 276/77, loss: 0.0004718205891549587 2023-01-22 08:49:50.188868: step: 280/77, loss: 2.8281247068662196e-06 2023-01-22 08:49:51.458007: step: 284/77, loss: 0.00025430243113078177 2023-01-22 08:49:52.767132: step: 288/77, loss: 3.74128330804524e-06 2023-01-22 08:49:54.067552: step: 292/77, loss: 0.002602202817797661 2023-01-22 08:49:55.345593: step: 296/77, loss: 9.79024753178237e-06 2023-01-22 08:49:56.600726: step: 300/77, loss: 9.252102245227434e-06 2023-01-22 08:49:57.861910: step: 304/77, loss: 7.802544860169291e-05 2023-01-22 08:49:59.133353: step: 308/77, loss: 9.53261333052069e-05 2023-01-22 08:50:00.475933: step: 312/77, loss: 1.3244408364698756e-05 2023-01-22 08:50:01.787045: step: 316/77, loss: 0.01857200264930725 2023-01-22 08:50:03.144237: step: 320/77, loss: 9.210885764332488e-06 2023-01-22 08:50:04.404811: step: 324/77, loss: 0.0003791408962570131 2023-01-22 08:50:05.674969: step: 328/77, loss: 2.8768197807949036e-05 2023-01-22 08:50:06.989847: step: 332/77, loss: 1.5513876860495657e-05 2023-01-22 08:50:08.269843: step: 336/77, loss: 0.000338991463650018 2023-01-22 08:50:09.549979: step: 340/77, loss: 0.0005667489604093134 2023-01-22 08:50:10.897983: step: 344/77, loss: 0.003694930812343955 2023-01-22 08:50:12.223420: step: 348/77, loss: 4.037413600599393e-05 2023-01-22 08:50:13.483344: step: 352/77, loss: 0.02028324268758297 2023-01-22 08:50:14.801437: step: 356/77, loss: 3.3076944419008214e-06 2023-01-22 08:50:16.077247: step: 360/77, loss: 9.149199513558415e-07 2023-01-22 08:50:17.416145: step: 364/77, loss: 0.011678007431328297 2023-01-22 08:50:18.696830: step: 368/77, loss: 0.0001280040160054341 2023-01-22 08:50:19.990036: step: 372/77, loss: 9.730198371471488e-07 2023-01-22 08:50:21.291312: step: 376/77, loss: 5.811441994296729e-08 2023-01-22 08:50:22.565826: step: 380/77, loss: 6.0846696214866824e-06 2023-01-22 08:50:23.864054: step: 384/77, loss: 8.547366451239213e-05 2023-01-22 08:50:25.179822: step: 388/77, loss: 0.00029323625494726 ================================================== Loss: 0.003 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 26} Test Chinese: {'template': {'p': 0.9696969696969697, 'r': 0.48854961832061067, 'f1': 0.6497461928934011}, 'slot': {'p': 0.3103448275862069, 'r': 0.008130081300813009, 'f1': 0.015845070422535214}, 'combined': 0.01029527418317009, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.04662219154972779, 'epoch': 26} Test Korean: {'template': {'p': 0.9701492537313433, 'r': 0.4961832061068702, 'f1': 0.6565656565656566}, 'slot': {'p': 0.3103448275862069, 'r': 0.008130081300813009, 'f1': 0.015845070422535214}, 'combined': 0.010403329065300898, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.48717948717948717, 'r': 0.035916824196597356, 'f1': 0.06690140845070423}, 'combined': 0.04839676356008391, 'epoch': 26} Test Russian: {'template': {'p': 0.9701492537313433, 'r': 0.4961832061068702, 'f1': 0.6565656565656566}, 'slot': {'p': 0.32142857142857145, 'r': 0.008130081300813009, 'f1': 0.01585903083700441}, 'combined': 0.010412494993992794, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 27 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:52:06.423547: step: 4/77, loss: 2.7505857360665686e-05 2023-01-22 08:52:07.712774: step: 8/77, loss: 0.0011554948287084699 2023-01-22 08:52:09.043018: step: 12/77, loss: 1.7303980712313205e-05 2023-01-22 08:52:10.312153: step: 16/77, loss: 1.1186986739630811e-05 2023-01-22 08:52:11.656450: step: 20/77, loss: 0.0033815372735261917 2023-01-22 08:52:12.965100: step: 24/77, loss: 2.60449974121002e-06 2023-01-22 08:52:14.267023: step: 28/77, loss: 0.004366590641438961 2023-01-22 08:52:15.546625: step: 32/77, loss: 1.6641301044728607e-05 2023-01-22 08:52:16.793875: step: 36/77, loss: 2.533194809473116e-08 2023-01-22 08:52:18.052515: step: 40/77, loss: 0.0004364684864412993 2023-01-22 08:52:19.392013: step: 44/77, loss: 1.937149818331818e-08 2023-01-22 08:52:20.679769: step: 48/77, loss: 1.1250177749388968e-06 2023-01-22 08:52:21.998658: step: 52/77, loss: 0.0012814695946872234 2023-01-22 08:52:23.300250: step: 56/77, loss: 3.8742774677302805e-07 2023-01-22 08:52:24.595290: step: 60/77, loss: 2.1708592612412758e-05 2023-01-22 08:52:25.871488: step: 64/77, loss: 3.951329290430294e-06 2023-01-22 08:52:27.150880: step: 68/77, loss: 1.3965031939733308e-05 2023-01-22 08:52:28.422983: step: 72/77, loss: 4.4106669520260766e-05 2023-01-22 08:52:29.753828: step: 76/77, loss: 0.017457332462072372 2023-01-22 08:52:31.010846: step: 80/77, loss: 5.629240331472829e-05 2023-01-22 08:52:32.301850: step: 84/77, loss: 0.00011551618081284687 2023-01-22 08:52:33.552002: step: 88/77, loss: 5.334536581358407e-07 2023-01-22 08:52:34.858564: step: 92/77, loss: 6.884167760290438e-07 2023-01-22 08:52:36.144297: step: 96/77, loss: 1.5734174667159095e-05 2023-01-22 08:52:37.447200: step: 100/77, loss: 0.00032483390532433987 2023-01-22 08:52:38.713937: step: 104/77, loss: 1.1384234994693543e-06 2023-01-22 08:52:40.057320: step: 108/77, loss: 0.03720640763640404 2023-01-22 08:52:41.322120: step: 112/77, loss: 4.27724517066963e-05 2023-01-22 08:52:42.612985: step: 116/77, loss: 0.00010154680057894439 2023-01-22 08:52:43.910927: step: 120/77, loss: 1.4752107801996317e-07 2023-01-22 08:52:45.208233: step: 124/77, loss: 0.00013078686606604606 2023-01-22 08:52:46.478956: step: 128/77, loss: 2.6672984176911996e-07 2023-01-22 08:52:47.733033: step: 132/77, loss: 0.00040122828795574605 2023-01-22 08:52:48.968796: step: 136/77, loss: 1.206991413482683e-07 2023-01-22 08:52:50.261822: step: 140/77, loss: 2.5793294753384544e-06 2023-01-22 08:52:51.579130: step: 144/77, loss: 2.38417555920023e-07 2023-01-22 08:52:52.873698: step: 148/77, loss: 2.6746415642264765e-06 2023-01-22 08:52:54.127447: step: 152/77, loss: 1.2441074431990273e-05 2023-01-22 08:52:55.426064: step: 156/77, loss: 0.0029361483175307512 2023-01-22 08:52:56.761903: step: 160/77, loss: 0.08445204049348831 2023-01-22 08:52:58.035493: step: 164/77, loss: 0.028432684019207954 2023-01-22 08:52:59.335391: step: 168/77, loss: 6.019993747941044e-07 2023-01-22 08:53:00.622849: step: 172/77, loss: 0.0037985225208103657 2023-01-22 08:53:01.921832: step: 176/77, loss: 3.0629413231508806e-05 2023-01-22 08:53:03.246197: step: 180/77, loss: 3.397369027879904e-06 2023-01-22 08:53:04.539813: step: 184/77, loss: 0.06735138595104218 2023-01-22 08:53:05.849581: step: 188/77, loss: 2.424426202196628e-05 2023-01-22 08:53:07.111777: step: 192/77, loss: 0.040859125554561615 2023-01-22 08:53:08.401981: step: 196/77, loss: 0.005081044510006905 2023-01-22 08:53:09.758985: step: 200/77, loss: 1.019201590679586e-05 2023-01-22 08:53:11.017616: step: 204/77, loss: 0.00355838006362319 2023-01-22 08:53:12.334014: step: 208/77, loss: 0.030417632311582565 2023-01-22 08:53:13.674425: step: 212/77, loss: 0.0011557607213035226 2023-01-22 08:53:14.971008: step: 216/77, loss: 0.00011477222142275423 2023-01-22 08:53:16.245076: step: 220/77, loss: 0.041373834013938904 2023-01-22 08:53:17.549766: step: 224/77, loss: 0.022509494796395302 2023-01-22 08:53:18.877469: step: 228/77, loss: 0.00010602780093904585 2023-01-22 08:53:20.187119: step: 232/77, loss: 2.339477020996128e-07 2023-01-22 08:53:21.486796: step: 236/77, loss: 0.004461529199033976 2023-01-22 08:53:22.768115: step: 240/77, loss: 2.2500658758417558e-07 2023-01-22 08:53:24.038084: step: 244/77, loss: 1.192092646817855e-08 2023-01-22 08:53:25.332439: step: 248/77, loss: 0.21817706525325775 2023-01-22 08:53:26.684546: step: 252/77, loss: 0.001352312508970499 2023-01-22 08:53:28.002614: step: 256/77, loss: 6.764993258912e-07 2023-01-22 08:53:29.336522: step: 260/77, loss: 0.00010967526759486645 2023-01-22 08:53:30.647174: step: 264/77, loss: 0.008482849225401878 2023-01-22 08:53:32.017416: step: 268/77, loss: 1.469506969442591e-05 2023-01-22 08:53:33.365074: step: 272/77, loss: 0.0027691826689988375 2023-01-22 08:53:34.692172: step: 276/77, loss: 0.0015755686908960342 2023-01-22 08:53:35.995064: step: 280/77, loss: 2.8417249268386513e-05 2023-01-22 08:53:37.300070: step: 284/77, loss: 1.1840852494060528e-05 2023-01-22 08:53:38.611749: step: 288/77, loss: 0.004036055412143469 2023-01-22 08:53:39.897773: step: 292/77, loss: 0.00029007441480644047 2023-01-22 08:53:41.170952: step: 296/77, loss: 0.01541918981820345 2023-01-22 08:53:42.497470: step: 300/77, loss: 0.00037965853698551655 2023-01-22 08:53:43.779553: step: 304/77, loss: 0.00203267065808177 2023-01-22 08:53:45.082202: step: 308/77, loss: 0.008641621097922325 2023-01-22 08:53:46.389157: step: 312/77, loss: 0.0005436694482341409 2023-01-22 08:53:47.642064: step: 316/77, loss: 4.508683105086675e-06 2023-01-22 08:53:48.899588: step: 320/77, loss: 1.2735614291159436e-05 2023-01-22 08:53:50.208272: step: 324/77, loss: 0.0005036424263380468 2023-01-22 08:53:51.525803: step: 328/77, loss: 2.058391328318976e-05 2023-01-22 08:53:52.787323: step: 332/77, loss: 5.881480683456175e-05 2023-01-22 08:53:54.062153: step: 336/77, loss: 6.234457396203652e-05 2023-01-22 08:53:55.363399: step: 340/77, loss: 3.680566464936419e-07 2023-01-22 08:53:56.612808: step: 344/77, loss: 2.4716209736652672e-05 2023-01-22 08:53:57.992895: step: 348/77, loss: 0.006677013821899891 2023-01-22 08:53:59.353981: step: 352/77, loss: 8.443155093118548e-05 2023-01-22 08:54:00.684074: step: 356/77, loss: 0.009522883221507072 2023-01-22 08:54:01.979006: step: 360/77, loss: 1.6093220267521247e-07 2023-01-22 08:54:03.277216: step: 364/77, loss: 0.0004596296639647335 2023-01-22 08:54:04.564533: step: 368/77, loss: 2.1775504137622193e-05 2023-01-22 08:54:05.896052: step: 372/77, loss: 7.584843388031004e-06 2023-01-22 08:54:07.222398: step: 376/77, loss: 0.05923104286193848 2023-01-22 08:54:08.504702: step: 380/77, loss: 0.05634100362658501 2023-01-22 08:54:09.815617: step: 384/77, loss: 0.010661378502845764 2023-01-22 08:54:11.112037: step: 388/77, loss: 0.04150150343775749 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 27} Test Chinese: {'template': {'p': 0.935064935064935, 'r': 0.549618320610687, 'f1': 0.6923076923076923}, 'slot': {'p': 0.4, 'r': 0.012646793134598013, 'f1': 0.0245183887915937}, 'combined': 0.01697426916341102, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 27} Test Korean: {'template': {'p': 0.9358974358974359, 'r': 0.5572519083969466, 'f1': 0.6985645933014354}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.018335028695575736, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 27} Test Russian: {'template': {'p': 0.9358974358974359, 'r': 0.5572519083969466, 'f1': 0.6985645933014354}, 'slot': {'p': 0.40540540540540543, 'r': 0.013550135501355014, 'f1': 0.026223776223776227}, 'combined': 0.01831900157259009, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 28 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:55:54.022745: step: 4/77, loss: 9.64068476605462e-07 2023-01-22 08:55:55.295584: step: 8/77, loss: 2.7865053198183887e-07 2023-01-22 08:55:56.537072: step: 12/77, loss: 1.0566669516265392e-05 2023-01-22 08:55:57.823679: step: 16/77, loss: 1.6584285731369164e-06 2023-01-22 08:55:59.102173: step: 20/77, loss: 0.00015290960436686873 2023-01-22 08:56:00.394381: step: 24/77, loss: 0.003574878443032503 2023-01-22 08:56:01.689855: step: 28/77, loss: 1.589842577232048e-05 2023-01-22 08:56:03.006484: step: 32/77, loss: 3.6132554669165984e-05 2023-01-22 08:56:04.353665: step: 36/77, loss: 0.06018991023302078 2023-01-22 08:56:05.673935: step: 40/77, loss: 1.1510334843478631e-05 2023-01-22 08:56:06.973323: step: 44/77, loss: 0.0002839084481820464 2023-01-22 08:56:08.260787: step: 48/77, loss: 0.006246471311897039 2023-01-22 08:56:09.500675: step: 52/77, loss: 7.348742201429559e-06 2023-01-22 08:56:10.812355: step: 56/77, loss: 0.00037151321885176003 2023-01-22 08:56:12.102273: step: 60/77, loss: 6.909019248269033e-06 2023-01-22 08:56:13.396328: step: 64/77, loss: 0.002263585338369012 2023-01-22 08:56:14.689124: step: 68/77, loss: 5.272985617921222e-06 2023-01-22 08:56:15.963459: step: 72/77, loss: 0.03045172430574894 2023-01-22 08:56:17.278166: step: 76/77, loss: 1.8863595414586598e-06 2023-01-22 08:56:18.578345: step: 80/77, loss: 0.02143908478319645 2023-01-22 08:56:19.895234: step: 84/77, loss: 4.5710938138654456e-05 2023-01-22 08:56:21.196082: step: 88/77, loss: 0.0002125641331076622 2023-01-22 08:56:22.443516: step: 92/77, loss: 5.6164758461818565e-06 2023-01-22 08:56:23.712036: step: 96/77, loss: 0.00024129284429363906 2023-01-22 08:56:24.987405: step: 100/77, loss: 0.006555612199008465 2023-01-22 08:56:26.269790: step: 104/77, loss: 0.008127989247441292 2023-01-22 08:56:27.546128: step: 108/77, loss: 1.2938665349793155e-05 2023-01-22 08:56:28.825431: step: 112/77, loss: 1.1773166988859884e-05 2023-01-22 08:56:30.104314: step: 116/77, loss: 7.165558599808719e-06 2023-01-22 08:56:31.379437: step: 120/77, loss: 0.0068009719252586365 2023-01-22 08:56:32.692581: step: 124/77, loss: 0.004748777486383915 2023-01-22 08:56:34.040683: step: 128/77, loss: 1.4603058673401392e-07 2023-01-22 08:56:35.339034: step: 132/77, loss: 9.514651901554316e-05 2023-01-22 08:56:36.619811: step: 136/77, loss: 0.0013951624277979136 2023-01-22 08:56:37.913452: step: 140/77, loss: 1.3904502338846214e-05 2023-01-22 08:56:39.209574: step: 144/77, loss: 0.014372942969202995 2023-01-22 08:56:40.514714: step: 148/77, loss: 1.192092380364329e-08 2023-01-22 08:56:41.773864: step: 152/77, loss: 0.00029474348411895335 2023-01-22 08:56:43.048952: step: 156/77, loss: 5.219217200647108e-06 2023-01-22 08:56:44.343038: step: 160/77, loss: 4.321329427625642e-08 2023-01-22 08:56:45.624418: step: 164/77, loss: 6.13247902947478e-05 2023-01-22 08:56:46.897736: step: 168/77, loss: 1.291885155296768e-06 2023-01-22 08:56:48.165293: step: 172/77, loss: 0.00014403194654732943 2023-01-22 08:56:49.504656: step: 176/77, loss: 0.00023046269780024886 2023-01-22 08:56:50.812085: step: 180/77, loss: 1.6256115031865193e-06 2023-01-22 08:56:52.106600: step: 184/77, loss: 6.258481732857035e-08 2023-01-22 08:56:53.379035: step: 188/77, loss: 3.6979461128794355e-06 2023-01-22 08:56:54.652565: step: 192/77, loss: 0.001247554668225348 2023-01-22 08:56:55.955748: step: 196/77, loss: 0.00020255711569916457 2023-01-22 08:56:57.267892: step: 200/77, loss: 0.00019485583470668644 2023-01-22 08:56:58.569226: step: 204/77, loss: 4.544795331185014e-07 2023-01-22 08:56:59.838487: step: 208/77, loss: 3.360617483849637e-05 2023-01-22 08:57:01.110712: step: 212/77, loss: 0.034614551812410355 2023-01-22 08:57:02.473189: step: 216/77, loss: 0.0002035450015682727 2023-01-22 08:57:03.762946: step: 220/77, loss: 0.0003514665877446532 2023-01-22 08:57:05.077799: step: 224/77, loss: 4.072986121173017e-05 2023-01-22 08:57:06.400745: step: 228/77, loss: 3.471943159638613e-07 2023-01-22 08:57:07.714455: step: 232/77, loss: 0.00031286414014175534 2023-01-22 08:57:09.030321: step: 236/77, loss: 0.0006258451612666249 2023-01-22 08:57:10.283548: step: 240/77, loss: 6.650136583630228e-06 2023-01-22 08:57:11.559018: step: 244/77, loss: 2.9312335755093955e-05 2023-01-22 08:57:12.916056: step: 248/77, loss: 1.4918949091224931e-05 2023-01-22 08:57:14.195499: step: 252/77, loss: 7.763389362480666e-07 2023-01-22 08:57:15.498852: step: 256/77, loss: 1.8924350797533407e-07 2023-01-22 08:57:16.833351: step: 260/77, loss: 6.862991722300649e-05 2023-01-22 08:57:18.201782: step: 264/77, loss: 5.841187658006675e-07 2023-01-22 08:57:19.497334: step: 268/77, loss: 0.015060674399137497 2023-01-22 08:57:20.798914: step: 272/77, loss: 2.6076759240822867e-07 2023-01-22 08:57:22.120637: step: 276/77, loss: 1.5377199815702625e-06 2023-01-22 08:57:23.350777: step: 280/77, loss: 7.972068942763144e-07 2023-01-22 08:57:24.645828: step: 284/77, loss: 1.070262078428641e-05 2023-01-22 08:57:25.978855: step: 288/77, loss: 0.00015180776244960725 2023-01-22 08:57:27.286849: step: 292/77, loss: 0.0358809158205986 2023-01-22 08:57:28.577332: step: 296/77, loss: 4.167438873992069e-06 2023-01-22 08:57:29.886713: step: 300/77, loss: 2.512177843527752e-06 2023-01-22 08:57:31.206009: step: 304/77, loss: 0.04607342183589935 2023-01-22 08:57:32.505162: step: 308/77, loss: 1.1491039003885817e-05 2023-01-22 08:57:33.733096: step: 312/77, loss: 8.81724372447934e-06 2023-01-22 08:57:35.040553: step: 316/77, loss: 1.818268356146291e-05 2023-01-22 08:57:36.361369: step: 320/77, loss: 0.00010696750541683286 2023-01-22 08:57:37.640870: step: 324/77, loss: 0.0004945929395034909 2023-01-22 08:57:38.966667: step: 328/77, loss: 0.000450802588602528 2023-01-22 08:57:40.242272: step: 332/77, loss: 0.00888136588037014 2023-01-22 08:57:41.507369: step: 336/77, loss: 9.238691234259022e-08 2023-01-22 08:57:42.780543: step: 340/77, loss: 0.000297154561849311 2023-01-22 08:57:44.137488: step: 344/77, loss: 7.912410637800349e-07 2023-01-22 08:57:45.403266: step: 348/77, loss: 4.245068339514546e-06 2023-01-22 08:57:46.681668: step: 352/77, loss: 9.834380989559577e-07 2023-01-22 08:57:47.946045: step: 356/77, loss: 0.015564743429422379 2023-01-22 08:57:49.277406: step: 360/77, loss: 0.0002977493277285248 2023-01-22 08:57:50.599604: step: 364/77, loss: 2.9537855880334973e-05 2023-01-22 08:57:51.892270: step: 368/77, loss: 8.240547322202474e-05 2023-01-22 08:57:53.160256: step: 372/77, loss: 0.0 2023-01-22 08:57:54.467997: step: 376/77, loss: 0.001022365759126842 2023-01-22 08:57:55.804654: step: 380/77, loss: 2.960517349492875e-06 2023-01-22 08:57:57.167734: step: 384/77, loss: 1.3913253496866673e-05 2023-01-22 08:57:58.448760: step: 388/77, loss: 0.00022609457664657384 ================================================== Loss: 0.003 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 28} Test Chinese: {'template': {'p': 0.9436619718309859, 'r': 0.5114503816793893, 'f1': 0.6633663366336633}, 'slot': {'p': 0.42857142857142855, 'r': 0.013550135501355014, 'f1': 0.02626970227670753}, 'combined': 0.017426436163756477, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 28} Test Korean: {'template': {'p': 0.9436619718309859, 'r': 0.5114503816793893, 'f1': 0.6633663366336633}, 'slot': {'p': 0.42424242424242425, 'r': 0.012646793134598013, 'f1': 0.024561403508771933}, 'combined': 0.01629320826819524, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 28} Test Russian: {'template': {'p': 0.9577464788732394, 'r': 0.5190839694656488, 'f1': 0.6732673267326732}, 'slot': {'p': 0.4411764705882353, 'r': 0.013550135501355014, 'f1': 0.02629272567922875}, 'combined': 0.01770203313056985, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ****************************** Epoch: 29 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:59:41.044805: step: 4/77, loss: 7.4505797087454084e-09 2023-01-22 08:59:42.326623: step: 8/77, loss: 6.965329521335661e-05 2023-01-22 08:59:43.639293: step: 12/77, loss: 0.03225981444120407 2023-01-22 08:59:44.907368: step: 16/77, loss: 5.960463234089275e-09 2023-01-22 08:59:46.207941: step: 20/77, loss: 0.0004558908985927701 2023-01-22 08:59:47.484466: step: 24/77, loss: 0.0009515452547930181 2023-01-22 08:59:48.777508: step: 28/77, loss: 0.012123556807637215 2023-01-22 08:59:50.078557: step: 32/77, loss: 3.725286745748235e-08 2023-01-22 08:59:51.424861: step: 36/77, loss: 9.973219857783988e-05 2023-01-22 08:59:52.750434: step: 40/77, loss: 4.164739584666677e-05 2023-01-22 08:59:54.078588: step: 44/77, loss: 0.0001398369495291263 2023-01-22 08:59:55.324800: step: 48/77, loss: 0.0 2023-01-22 08:59:56.617987: step: 52/77, loss: 0.002460264600813389 2023-01-22 08:59:57.905489: step: 56/77, loss: 6.1242428728292e-07 2023-01-22 08:59:59.207649: step: 60/77, loss: 1.2546159950943547e-06 2023-01-22 09:00:00.527030: step: 64/77, loss: 0.00239675329066813 2023-01-22 09:00:01.880470: step: 68/77, loss: 1.627564779482782e-05 2023-01-22 09:00:03.207356: step: 72/77, loss: 1.3729930287809111e-05 2023-01-22 09:00:04.504088: step: 76/77, loss: 0.00010529484279686585 2023-01-22 09:00:05.775955: step: 80/77, loss: 3.3361757232341915e-05 2023-01-22 09:00:07.026099: step: 84/77, loss: 0.002414719434455037 2023-01-22 09:00:08.322537: step: 88/77, loss: 5.140843768458581e-07 2023-01-22 09:00:09.644829: step: 92/77, loss: 0.20920942723751068 2023-01-22 09:00:10.940442: step: 96/77, loss: 1.2781938494299538e-05 2023-01-22 09:00:12.233120: step: 100/77, loss: 0.0014965697191655636 2023-01-22 09:00:13.513510: step: 104/77, loss: 1.2248308394191554e-06 2023-01-22 09:00:14.769038: step: 108/77, loss: 0.029241599142551422 2023-01-22 09:00:16.050342: step: 112/77, loss: 1.4289505543274572e-06 2023-01-22 09:00:17.306684: step: 116/77, loss: 1.4232809917302802e-05 2023-01-22 09:00:18.600075: step: 120/77, loss: 4.231874015658832e-07 2023-01-22 09:00:19.935230: step: 124/77, loss: 2.9802318390892424e-09 2023-01-22 09:00:21.210406: step: 128/77, loss: 1.923632453326718e-06 2023-01-22 09:00:22.483988: step: 132/77, loss: 5.751722937930026e-07 2023-01-22 09:00:23.739840: step: 136/77, loss: 5.915730412198172e-07 2023-01-22 09:00:25.027942: step: 140/77, loss: 0.00011380821524653584 2023-01-22 09:00:26.331157: step: 144/77, loss: 8.085298759397119e-05 2023-01-22 09:00:27.638652: step: 148/77, loss: 0.00672421557828784 2023-01-22 09:00:28.955138: step: 152/77, loss: 1.7091322206397308e-06 2023-01-22 09:00:30.231600: step: 156/77, loss: 1.7865913832793012e-06 2023-01-22 09:00:31.502309: step: 160/77, loss: 7.068046397762373e-05 2023-01-22 09:00:32.811958: step: 164/77, loss: 7.897369869169779e-07 2023-01-22 09:00:34.168913: step: 168/77, loss: 3.203735445822531e-07 2023-01-22 09:00:35.464905: step: 172/77, loss: 2.61657555711281e-06 2023-01-22 09:00:36.731470: step: 176/77, loss: 3.048412054340588e-06 2023-01-22 09:00:37.987662: step: 180/77, loss: 5.527518624148797e-06 2023-01-22 09:00:39.261320: step: 184/77, loss: 3.007160012202803e-05 2023-01-22 09:00:40.603504: step: 188/77, loss: 2.419860720692668e-06 2023-01-22 09:00:41.910414: step: 192/77, loss: 7.220292172860354e-05 2023-01-22 09:00:43.212616: step: 196/77, loss: 2.734985901042819e-05 2023-01-22 09:00:44.520587: step: 200/77, loss: 2.2843109945824835e-06 2023-01-22 09:00:45.812034: step: 204/77, loss: 0.00011313649883959442 2023-01-22 09:00:47.142771: step: 208/77, loss: 0.00021175568690523505 2023-01-22 09:00:48.424851: step: 212/77, loss: 3.129242287513989e-08 2023-01-22 09:00:49.717025: step: 216/77, loss: 6.079644663259387e-07 2023-01-22 09:00:51.046933: step: 220/77, loss: 0.0003149434342049062 2023-01-22 09:00:52.372501: step: 224/77, loss: 0.0005252505652606487 2023-01-22 09:00:53.598951: step: 228/77, loss: 0.0004992748727090657 2023-01-22 09:00:54.914442: step: 232/77, loss: 0.014604151248931885 2023-01-22 09:00:56.198673: step: 236/77, loss: 1.513875190539693e-06 2023-01-22 09:00:57.532946: step: 240/77, loss: 9.912311725202017e-06 2023-01-22 09:00:58.780910: step: 244/77, loss: 1.54368092353252e-06 2023-01-22 09:01:00.108073: step: 248/77, loss: 0.01198370661586523 2023-01-22 09:01:01.389153: step: 252/77, loss: 0.00092380988644436 2023-01-22 09:01:02.698383: step: 256/77, loss: 3.4029815196845448e-06 2023-01-22 09:01:04.038820: step: 260/77, loss: 0.00012798003444913775 2023-01-22 09:01:05.346180: step: 264/77, loss: 8.236154826590791e-06 2023-01-22 09:01:06.664296: step: 268/77, loss: 0.0007766756461933255 2023-01-22 09:01:07.939818: step: 272/77, loss: 0.012719300575554371 2023-01-22 09:01:09.229462: step: 276/77, loss: 0.0002160976582672447 2023-01-22 09:01:10.514842: step: 280/77, loss: 0.12471464276313782 2023-01-22 09:01:11.797887: step: 284/77, loss: 4.7619501856388524e-05 2023-01-22 09:01:13.110456: step: 288/77, loss: 3.392129292478785e-05 2023-01-22 09:01:14.452244: step: 292/77, loss: 1.928166739162407e-06 2023-01-22 09:01:15.767719: step: 296/77, loss: 0.048692576587200165 2023-01-22 09:01:17.042756: step: 300/77, loss: 0.0004496439069043845 2023-01-22 09:01:18.334484: step: 304/77, loss: 0.009658782742917538 2023-01-22 09:01:19.607796: step: 308/77, loss: 4.896618702332489e-05 2023-01-22 09:01:20.886802: step: 312/77, loss: 0.018360355868935585 2023-01-22 09:01:22.186028: step: 316/77, loss: 1.70316445746721e-06 2023-01-22 09:01:23.473075: step: 320/77, loss: 5.35187200512155e-06 2023-01-22 09:01:24.792517: step: 324/77, loss: 0.0013014284195378423 2023-01-22 09:01:26.102831: step: 328/77, loss: 6.562308044522069e-06 2023-01-22 09:01:27.410947: step: 332/77, loss: 2.9375285521382466e-05 2023-01-22 09:01:28.741057: step: 336/77, loss: 7.343215111177415e-05 2023-01-22 09:01:30.063324: step: 340/77, loss: 0.0003839013515971601 2023-01-22 09:01:31.348312: step: 344/77, loss: 5.2280905947554857e-05 2023-01-22 09:01:32.651761: step: 348/77, loss: 0.0002265849761897698 2023-01-22 09:01:34.008790: step: 352/77, loss: 0.00012757748481817544 2023-01-22 09:01:35.328830: step: 356/77, loss: 0.0004892974975518882 2023-01-22 09:01:36.681861: step: 360/77, loss: 0.04070931300520897 2023-01-22 09:01:37.929913: step: 364/77, loss: 0.00010159768862649798 2023-01-22 09:01:39.228187: step: 368/77, loss: 0.0039003193378448486 2023-01-22 09:01:40.530492: step: 372/77, loss: 0.0008470588945783675 2023-01-22 09:01:41.834961: step: 376/77, loss: 0.03480779007077217 2023-01-22 09:01:43.111370: step: 380/77, loss: 6.759603274986148e-05 2023-01-22 09:01:44.411391: step: 384/77, loss: 0.03037024661898613 2023-01-22 09:01:45.707165: step: 388/77, loss: 7.473508958355524e-06 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 29} Test Chinese: {'template': {'p': 0.9436619718309859, 'r': 0.5114503816793893, 'f1': 0.6633663366336633}, 'slot': {'p': 0.45161290322580644, 'r': 0.012646793134598013, 'f1': 0.024604569420035152}, 'combined': 0.016321843080617376, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 29} Test Korean: {'template': {'p': 0.9436619718309859, 'r': 0.5114503816793893, 'f1': 0.6633663366336633}, 'slot': {'p': 0.43333333333333335, 'r': 0.011743450767841012, 'f1': 0.022867194371152158}, 'combined': 0.015169326959081133, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05085442919642522, 'epoch': 29} Test Russian: {'template': {'p': 0.9305555555555556, 'r': 0.5114503816793893, 'f1': 0.6600985221674877}, 'slot': {'p': 0.45161290322580644, 'r': 0.012646793134598013, 'f1': 0.024604569420035152}, 'combined': 0.016241439912732563, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Chinese: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.39473684210526316, 'r': 0.013550135501355014, 'f1': 0.026200873362445413}, 'combined': 0.01676855895196506, 'epoch': 3} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 3} Test for Korean: {'template': {'p': 0.927536231884058, 'r': 0.48854961832061067, 'f1': 0.6399999999999999}, 'slot': {'p': 0.4166666666666667, 'r': 0.013550135501355014, 'f1': 0.02624671916010499}, 'combined': 0.01679790026246719, 'epoch': 3} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test for Russian: {'template': {'p': 0.9382716049382716, 'r': 0.5801526717557252, 'f1': 0.7169811320754718}, 'slot': {'p': 0.4, 'r': 0.009033423667570008, 'f1': 0.0176678445229682}, 'combined': 0.012667511167411162, 'epoch': 4} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4}