Command that produces this log: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 ---------------------------------------------------------------------------------------------------- > trainable params: >>> xlmr.embeddings.word_embeddings.weight: torch.Size([250002, 1024]) >>> xlmr.embeddings.position_embeddings.weight: torch.Size([514, 1024]) >>> xlmr.embeddings.token_type_embeddings.weight: torch.Size([1, 1024]) >>> xlmr.embeddings.LayerNorm.weight: torch.Size([1024]) >>> xlmr.embeddings.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.0.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.0.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.0.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.0.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.0.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.1.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.1.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.1.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.1.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.1.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.2.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.2.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.2.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.2.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.2.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.3.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.3.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.3.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.3.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.3.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.4.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.4.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.4.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.4.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.4.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.5.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.5.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.5.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.5.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.5.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.6.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.6.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.6.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.6.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.6.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.7.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.7.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.7.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.7.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.7.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.8.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.8.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.8.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.8.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.8.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.9.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.9.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.9.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.9.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.9.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.10.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.10.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.10.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.10.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.10.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.11.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.11.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.11.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.11.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.11.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.12.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.12.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.12.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.12.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.12.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.13.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.13.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.13.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.13.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.13.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.14.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.14.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.14.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.14.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.14.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.15.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.15.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.15.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.15.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.15.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.16.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.16.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.16.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.16.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.16.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.17.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.17.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.17.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.17.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.17.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.18.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.18.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.18.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.18.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.18.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.19.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.19.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.19.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.19.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.19.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.20.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.20.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.20.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.20.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.20.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.21.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.21.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.21.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.21.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.21.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.22.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.22.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.22.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.22.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.22.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.query.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.query.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.key.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.key.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.self.value.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.self.value.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.dense.weight: torch.Size([1024, 1024]) >>> xlmr.encoder.layer.23.attention.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.attention.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.intermediate.dense.weight: torch.Size([4096, 1024]) >>> xlmr.encoder.layer.23.intermediate.dense.bias: torch.Size([4096]) >>> xlmr.encoder.layer.23.output.dense.weight: torch.Size([1024, 4096]) >>> xlmr.encoder.layer.23.output.dense.bias: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.weight: torch.Size([1024]) >>> xlmr.encoder.layer.23.output.LayerNorm.bias: torch.Size([1024]) >>> xlmr.pooler.dense.weight: torch.Size([1024, 1024]) >>> xlmr.pooler.dense.bias: torch.Size([1024]) >>> trans_rep.weight: torch.Size([1024, 2048]) >>> trans_rep.bias: torch.Size([1024]) >>> hidden_ffns.Corruplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Corruplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Cybercrimeplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Cybercrimeplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Disasterplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Disasterplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Displacementplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Displacementplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Epidemiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Epidemiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Etiplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Etiplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Protestplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Protestplate.layers.0.bias: torch.Size([768]) >>> hidden_ffns.Terrorplate.layers.0.weight: torch.Size([768, 1024]) >>> hidden_ffns.Terrorplate.layers.0.bias: torch.Size([768]) >>> template_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> template_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> template_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> template_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> template_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> type_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Corruplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Corruplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Disasterplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Disasterplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Displacementplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Displacementplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Epidemiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Epidemiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Etiplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Etiplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Protestplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Protestplate.layers.1.bias: torch.Size([6]) >>> type_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> type_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> type_classifiers.Terrorplate.layers.1.weight: torch.Size([6, 450]) >>> type_classifiers.Terrorplate.layers.1.bias: torch.Size([6]) >>> completion_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Corruplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Corruplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Disasterplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Disasterplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Displacementplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Displacementplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Epidemiplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Epidemiplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Etiplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Etiplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Protestplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Protestplate.layers.1.bias: torch.Size([4]) >>> completion_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> completion_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> completion_classifiers.Terrorplate.layers.1.weight: torch.Size([4, 450]) >>> completion_classifiers.Terrorplate.layers.1.bias: torch.Size([4]) >>> overtime_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> overtime_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> overtime_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> overtime_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> overtime_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Corruplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Corruplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Corruplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Corruplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Cybercrimeplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Cybercrimeplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Disasterplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Disasterplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Disasterplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Disasterplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Displacementplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Displacementplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Displacementplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Displacementplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Epidemiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Epidemiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Epidemiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Epidemiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Etiplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Etiplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Etiplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Etiplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Protestplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Protestplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Protestplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Protestplate.layers.1.bias: torch.Size([2]) >>> coordinated_classifiers.Terrorplate.layers.0.weight: torch.Size([450, 768]) >>> coordinated_classifiers.Terrorplate.layers.0.bias: torch.Size([450]) >>> coordinated_classifiers.Terrorplate.layers.1.weight: torch.Size([2, 450]) >>> coordinated_classifiers.Terrorplate.layers.1.bias: torch.Size([2]) n_trainable_params: 582185936, n_nontrainable_params: 0 ---------------------------------------------------------------------------------------------------- ****************************** Epoch: 0 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:32:14.481307: step: 4/77, loss: 1.0471444129943848 2023-01-22 07:32:15.785317: step: 8/77, loss: 1.0665420293807983 2023-01-22 07:32:17.050086: step: 12/77, loss: 1.0573136806488037 2023-01-22 07:32:18.339764: step: 16/77, loss: 1.0555137395858765 2023-01-22 07:32:19.633522: step: 20/77, loss: 1.0451674461364746 2023-01-22 07:32:20.951872: step: 24/77, loss: 1.049659252166748 2023-01-22 07:32:22.281434: step: 28/77, loss: 1.0589720010757446 2023-01-22 07:32:23.587533: step: 32/77, loss: 1.0422852039337158 2023-01-22 07:32:24.881699: step: 36/77, loss: 1.0360052585601807 2023-01-22 07:32:26.188712: step: 40/77, loss: 1.028542399406433 2023-01-22 07:32:27.467052: step: 44/77, loss: 1.021812915802002 2023-01-22 07:32:28.734730: step: 48/77, loss: 1.0086398124694824 2023-01-22 07:32:30.040471: step: 52/77, loss: 1.0082159042358398 2023-01-22 07:32:31.314652: step: 56/77, loss: 0.9977434873580933 2023-01-22 07:32:32.612721: step: 60/77, loss: 0.9916622638702393 2023-01-22 07:32:33.928634: step: 64/77, loss: 0.9763437509536743 2023-01-22 07:32:35.149434: step: 68/77, loss: 0.9689540863037109 2023-01-22 07:32:36.476761: step: 72/77, loss: 0.9549002647399902 2023-01-22 07:32:37.804714: step: 76/77, loss: 0.934126615524292 2023-01-22 07:32:39.130313: step: 80/77, loss: 0.9254927039146423 2023-01-22 07:32:40.388772: step: 84/77, loss: 0.9223530292510986 2023-01-22 07:32:41.709283: step: 88/77, loss: 0.8978661298751831 2023-01-22 07:32:43.020518: step: 92/77, loss: 0.8742111921310425 2023-01-22 07:32:44.336430: step: 96/77, loss: 0.8754185438156128 2023-01-22 07:32:45.641635: step: 100/77, loss: 0.8745602369308472 2023-01-22 07:32:46.957653: step: 104/77, loss: 0.8353378772735596 2023-01-22 07:32:48.267022: step: 108/77, loss: 0.8197598457336426 2023-01-22 07:32:49.590473: step: 112/77, loss: 0.8277198076248169 2023-01-22 07:32:50.806044: step: 116/77, loss: 0.7971259355545044 2023-01-22 07:32:52.113516: step: 120/77, loss: 0.7589684724807739 2023-01-22 07:32:53.424426: step: 124/77, loss: 0.7603246569633484 2023-01-22 07:32:54.728684: step: 128/77, loss: 0.7163886427879333 2023-01-22 07:32:56.085217: step: 132/77, loss: 0.7048380970954895 2023-01-22 07:32:57.381502: step: 136/77, loss: 0.690535306930542 2023-01-22 07:32:58.735404: step: 140/77, loss: 0.6806949973106384 2023-01-22 07:33:00.056743: step: 144/77, loss: 0.6635257005691528 2023-01-22 07:33:01.384840: step: 148/77, loss: 0.6138787269592285 2023-01-22 07:33:02.747225: step: 152/77, loss: 0.5696084499359131 2023-01-22 07:33:04.060021: step: 156/77, loss: 0.5784422755241394 2023-01-22 07:33:05.402515: step: 160/77, loss: 0.6332916021347046 2023-01-22 07:33:06.720167: step: 164/77, loss: 0.505962073802948 2023-01-22 07:33:08.083354: step: 168/77, loss: 0.5323714017868042 2023-01-22 07:33:09.412318: step: 172/77, loss: 0.4151668846607208 2023-01-22 07:33:10.712350: step: 176/77, loss: 0.40285319089889526 2023-01-22 07:33:12.020613: step: 180/77, loss: 0.39407557249069214 2023-01-22 07:33:13.307873: step: 184/77, loss: 0.4074208736419678 2023-01-22 07:33:14.693613: step: 188/77, loss: 0.4065112769603729 2023-01-22 07:33:15.950799: step: 192/77, loss: 0.37398186326026917 2023-01-22 07:33:17.231627: step: 196/77, loss: 0.3212049603462219 2023-01-22 07:33:18.521015: step: 200/77, loss: 0.30345451831817627 2023-01-22 07:33:19.843761: step: 204/77, loss: 0.4757692813873291 2023-01-22 07:33:21.120216: step: 208/77, loss: 0.2263353168964386 2023-01-22 07:33:22.388045: step: 212/77, loss: 0.26267728209495544 2023-01-22 07:33:23.664468: step: 216/77, loss: 0.17210954427719116 2023-01-22 07:33:24.951564: step: 220/77, loss: 0.2977479100227356 2023-01-22 07:33:26.250572: step: 224/77, loss: 0.1835707575082779 2023-01-22 07:33:27.533515: step: 228/77, loss: 0.3345186412334442 2023-01-22 07:33:28.832872: step: 232/77, loss: 0.13545027375221252 2023-01-22 07:33:30.143749: step: 236/77, loss: 0.12607897818088531 2023-01-22 07:33:31.469112: step: 240/77, loss: 0.11827825009822845 2023-01-22 07:33:32.776045: step: 244/77, loss: 0.12457980215549469 2023-01-22 07:33:34.081297: step: 248/77, loss: 0.15995153784751892 2023-01-22 07:33:35.375081: step: 252/77, loss: 0.3346996009349823 2023-01-22 07:33:36.675124: step: 256/77, loss: 0.22116169333457947 2023-01-22 07:33:37.961601: step: 260/77, loss: 0.10209763050079346 2023-01-22 07:33:39.276608: step: 264/77, loss: 0.07830449193716049 2023-01-22 07:33:40.559044: step: 268/77, loss: 0.05993305519223213 2023-01-22 07:33:41.829850: step: 272/77, loss: 0.1856289505958557 2023-01-22 07:33:43.117810: step: 276/77, loss: 0.11337044090032578 2023-01-22 07:33:44.416389: step: 280/77, loss: 0.12034044414758682 2023-01-22 07:33:45.733900: step: 284/77, loss: 0.12572534382343292 2023-01-22 07:33:47.080498: step: 288/77, loss: 0.1536373645067215 2023-01-22 07:33:48.361974: step: 292/77, loss: 0.04774583876132965 2023-01-22 07:33:49.626182: step: 296/77, loss: 0.13813336193561554 2023-01-22 07:33:50.977839: step: 300/77, loss: 0.06406290829181671 2023-01-22 07:33:52.249799: step: 304/77, loss: 0.09071193635463715 2023-01-22 07:33:53.578931: step: 308/77, loss: 0.08882340788841248 2023-01-22 07:33:54.908083: step: 312/77, loss: 0.03451357036828995 2023-01-22 07:33:56.150739: step: 316/77, loss: 0.05280934274196625 2023-01-22 07:33:57.455594: step: 320/77, loss: 0.0957951694726944 2023-01-22 07:33:58.811387: step: 324/77, loss: 0.09612944722175598 2023-01-22 07:34:00.084280: step: 328/77, loss: 0.09476247429847717 2023-01-22 07:34:01.363739: step: 332/77, loss: 0.08481978625059128 2023-01-22 07:34:02.627527: step: 336/77, loss: 0.39117416739463806 2023-01-22 07:34:03.973847: step: 340/77, loss: 0.14474590122699738 2023-01-22 07:34:05.264007: step: 344/77, loss: 0.10986341536045074 2023-01-22 07:34:06.597130: step: 348/77, loss: 0.03673511743545532 2023-01-22 07:34:07.888220: step: 352/77, loss: 0.0850902646780014 2023-01-22 07:34:09.213023: step: 356/77, loss: 0.09250978380441666 2023-01-22 07:34:10.524861: step: 360/77, loss: 0.3991732597351074 2023-01-22 07:34:11.851111: step: 364/77, loss: 0.13359756767749786 2023-01-22 07:34:13.158161: step: 368/77, loss: 0.20494695007801056 2023-01-22 07:34:14.444383: step: 372/77, loss: 0.08006304502487183 2023-01-22 07:34:15.787590: step: 376/77, loss: 0.0816473588347435 2023-01-22 07:34:17.091117: step: 380/77, loss: 0.12702281773090363 2023-01-22 07:34:18.403386: step: 384/77, loss: 0.05596127733588219 2023-01-22 07:34:19.785146: step: 388/77, loss: 0.12373457849025726 ================================================== Loss: 0.479 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Chinese: {'template': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Korean: {'template': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test Russian: {'template': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 1 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:36:30.154552: step: 4/77, loss: 0.21097904443740845 2023-01-22 07:36:31.465602: step: 8/77, loss: 0.12158848345279694 2023-01-22 07:36:32.816436: step: 12/77, loss: 0.08410963416099548 2023-01-22 07:36:34.085167: step: 16/77, loss: 0.09124782681465149 2023-01-22 07:36:35.399804: step: 20/77, loss: 0.08149276673793793 2023-01-22 07:36:36.683976: step: 24/77, loss: 0.047875721007585526 2023-01-22 07:36:37.993333: step: 28/77, loss: 0.07582291960716248 2023-01-22 07:36:39.299679: step: 32/77, loss: 0.05457703024148941 2023-01-22 07:36:40.583970: step: 36/77, loss: 0.05679886043071747 2023-01-22 07:36:41.850553: step: 40/77, loss: 0.28963497281074524 2023-01-22 07:36:43.198591: step: 44/77, loss: 0.04498041421175003 2023-01-22 07:36:44.470137: step: 48/77, loss: 0.07296052575111389 2023-01-22 07:36:45.772974: step: 52/77, loss: 0.07825222611427307 2023-01-22 07:36:47.021951: step: 56/77, loss: 0.07049486041069031 2023-01-22 07:36:48.308583: step: 60/77, loss: 0.05149099975824356 2023-01-22 07:36:49.619249: step: 64/77, loss: 0.10428111255168915 2023-01-22 07:36:50.936921: step: 68/77, loss: 0.15299206972122192 2023-01-22 07:36:52.222603: step: 72/77, loss: 0.07444358617067337 2023-01-22 07:36:53.549973: step: 76/77, loss: 0.10790640115737915 2023-01-22 07:36:54.826249: step: 80/77, loss: 0.08597603440284729 2023-01-22 07:36:56.152064: step: 84/77, loss: 0.12119434773921967 2023-01-22 07:36:57.482241: step: 88/77, loss: 0.08092857897281647 2023-01-22 07:36:58.816973: step: 92/77, loss: 0.3239857852458954 2023-01-22 07:37:00.117428: step: 96/77, loss: 0.07001736015081406 2023-01-22 07:37:01.426162: step: 100/77, loss: 0.06111065298318863 2023-01-22 07:37:02.709901: step: 104/77, loss: 0.09189890325069427 2023-01-22 07:37:03.992942: step: 108/77, loss: 0.05465451627969742 2023-01-22 07:37:05.330568: step: 112/77, loss: 0.03947276249527931 2023-01-22 07:37:06.619578: step: 116/77, loss: 0.0742819756269455 2023-01-22 07:37:07.899769: step: 120/77, loss: 0.0826253667473793 2023-01-22 07:37:09.230398: step: 124/77, loss: 0.1338333636522293 2023-01-22 07:37:10.533356: step: 128/77, loss: 0.07840230315923691 2023-01-22 07:37:11.836822: step: 132/77, loss: 0.13976845145225525 2023-01-22 07:37:13.150083: step: 136/77, loss: 0.06765273958444595 2023-01-22 07:37:14.480526: step: 140/77, loss: 0.27704665064811707 2023-01-22 07:37:15.791624: step: 144/77, loss: 0.06916029751300812 2023-01-22 07:37:17.088002: step: 148/77, loss: 0.1273961067199707 2023-01-22 07:37:18.409981: step: 152/77, loss: 0.10133853554725647 2023-01-22 07:37:19.685327: step: 156/77, loss: 0.13052284717559814 2023-01-22 07:37:20.993826: step: 160/77, loss: 0.14147736132144928 2023-01-22 07:37:22.282249: step: 164/77, loss: 0.11222036182880402 2023-01-22 07:37:23.580589: step: 168/77, loss: 0.053240492939949036 2023-01-22 07:37:24.941449: step: 172/77, loss: 0.11585910618305206 2023-01-22 07:37:26.272853: step: 176/77, loss: 0.03825229033827782 2023-01-22 07:37:27.581492: step: 180/77, loss: 0.07558736205101013 2023-01-22 07:37:28.851305: step: 184/77, loss: 0.10447107255458832 2023-01-22 07:37:30.210546: step: 188/77, loss: 0.09023972600698471 2023-01-22 07:37:31.544444: step: 192/77, loss: 0.09343035519123077 2023-01-22 07:37:32.888100: step: 196/77, loss: 0.17641930282115936 2023-01-22 07:37:34.194373: step: 200/77, loss: 0.13928231596946716 2023-01-22 07:37:35.492825: step: 204/77, loss: 0.05740395188331604 2023-01-22 07:37:36.833240: step: 208/77, loss: 0.12304575741291046 2023-01-22 07:37:38.165595: step: 212/77, loss: 0.12332822382450104 2023-01-22 07:37:39.483836: step: 216/77, loss: 0.05503353476524353 2023-01-22 07:37:40.749075: step: 220/77, loss: 0.05017006769776344 2023-01-22 07:37:42.005317: step: 224/77, loss: 0.09479832649230957 2023-01-22 07:37:43.314211: step: 228/77, loss: 0.058456674218177795 2023-01-22 07:37:44.616290: step: 232/77, loss: 0.042720895260572433 2023-01-22 07:37:45.910509: step: 236/77, loss: 0.06012682616710663 2023-01-22 07:37:47.181140: step: 240/77, loss: 0.10036720335483551 2023-01-22 07:37:48.496748: step: 244/77, loss: 0.0868229866027832 2023-01-22 07:37:49.819688: step: 248/77, loss: 0.11549285054206848 2023-01-22 07:37:51.140210: step: 252/77, loss: 0.1314641237258911 2023-01-22 07:37:52.408252: step: 256/77, loss: 0.14948371052742004 2023-01-22 07:37:53.717798: step: 260/77, loss: 0.052075546234846115 2023-01-22 07:37:55.001200: step: 264/77, loss: 0.1409543752670288 2023-01-22 07:37:56.305130: step: 268/77, loss: 0.1372973918914795 2023-01-22 07:37:57.616131: step: 272/77, loss: 0.051039919257164 2023-01-22 07:37:58.893109: step: 276/77, loss: 0.10971783846616745 2023-01-22 07:38:00.150807: step: 280/77, loss: 0.03424395993351936 2023-01-22 07:38:01.438257: step: 284/77, loss: 0.06090284138917923 2023-01-22 07:38:02.809796: step: 288/77, loss: 0.03272155672311783 2023-01-22 07:38:04.099083: step: 292/77, loss: 0.07862883806228638 2023-01-22 07:38:05.406794: step: 296/77, loss: 0.030069496482610703 2023-01-22 07:38:06.759295: step: 300/77, loss: 0.20931200683116913 2023-01-22 07:38:08.104146: step: 304/77, loss: 0.2595861554145813 2023-01-22 07:38:09.424007: step: 308/77, loss: 0.082735575735569 2023-01-22 07:38:10.765472: step: 312/77, loss: 0.08077457547187805 2023-01-22 07:38:12.046421: step: 316/77, loss: 0.07214593142271042 2023-01-22 07:38:13.351774: step: 320/77, loss: 0.07391369342803955 2023-01-22 07:38:14.646289: step: 324/77, loss: 0.25721216201782227 2023-01-22 07:38:15.929367: step: 328/77, loss: 0.1590745449066162 2023-01-22 07:38:17.204449: step: 332/77, loss: 0.08248879760503769 2023-01-22 07:38:18.531888: step: 336/77, loss: 0.11362461745738983 2023-01-22 07:38:19.812801: step: 340/77, loss: 0.024861745536327362 2023-01-22 07:38:21.162131: step: 344/77, loss: 0.21572968363761902 2023-01-22 07:38:22.455419: step: 348/77, loss: 0.09827074408531189 2023-01-22 07:38:23.783318: step: 352/77, loss: 0.05567440763115883 2023-01-22 07:38:25.100887: step: 356/77, loss: 0.02224394679069519 2023-01-22 07:38:26.412449: step: 360/77, loss: 0.07356810569763184 2023-01-22 07:38:27.720076: step: 364/77, loss: 0.11073225736618042 2023-01-22 07:38:29.035054: step: 368/77, loss: 0.0851237028837204 2023-01-22 07:38:30.347154: step: 372/77, loss: 0.032852984964847565 2023-01-22 07:38:31.677268: step: 376/77, loss: 0.12542441487312317 2023-01-22 07:38:33.000982: step: 380/77, loss: 0.08235940337181091 2023-01-22 07:38:34.321025: step: 384/77, loss: 0.08265835046768188 2023-01-22 07:38:35.673196: step: 388/77, loss: 0.12302671372890472 ================================================== Loss: 0.100 -------------------- Dev Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Chinese: {'template': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Korean: {'template': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Dev Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Test Russian: {'template': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} Sample Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 1} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Chinese: {'template': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Chinese: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Korean: {'template': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Korean: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} -------------------- Dev for Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Test for Russian: {'template': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} Russian: {'template': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 0} ****************************** Epoch: 2 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:40:16.031292: step: 4/77, loss: 0.04768185317516327 2023-01-22 07:40:17.318402: step: 8/77, loss: 0.07104780524969101 2023-01-22 07:40:18.648046: step: 12/77, loss: 0.19291284680366516 2023-01-22 07:40:19.901192: step: 16/77, loss: 0.0543084442615509 2023-01-22 07:40:21.202681: step: 20/77, loss: 0.1463380753993988 2023-01-22 07:40:22.564190: step: 24/77, loss: 0.08262240886688232 2023-01-22 07:40:23.832344: step: 28/77, loss: 0.04590801149606705 2023-01-22 07:40:25.136594: step: 32/77, loss: 0.1446647346019745 2023-01-22 07:40:26.468662: step: 36/77, loss: 0.15891560912132263 2023-01-22 07:40:27.752403: step: 40/77, loss: 0.11249516904354095 2023-01-22 07:40:29.041641: step: 44/77, loss: 0.07387419044971466 2023-01-22 07:40:30.326326: step: 48/77, loss: 0.07768457382917404 2023-01-22 07:40:31.642481: step: 52/77, loss: 0.05759643018245697 2023-01-22 07:40:32.935238: step: 56/77, loss: 0.3014858663082123 2023-01-22 07:40:34.247880: step: 60/77, loss: 0.023099102079868317 2023-01-22 07:40:35.557412: step: 64/77, loss: 0.054274268448352814 2023-01-22 07:40:36.816391: step: 68/77, loss: 0.04140179604291916 2023-01-22 07:40:38.108704: step: 72/77, loss: 0.0799446851015091 2023-01-22 07:40:39.433827: step: 76/77, loss: 0.07328343391418457 2023-01-22 07:40:40.771037: step: 80/77, loss: 0.19326601922512054 2023-01-22 07:40:42.066134: step: 84/77, loss: 0.15116506814956665 2023-01-22 07:40:43.393354: step: 88/77, loss: 0.12275572866201401 2023-01-22 07:40:44.655084: step: 92/77, loss: 0.07996051758527756 2023-01-22 07:40:45.992150: step: 96/77, loss: 0.04166724160313606 2023-01-22 07:40:47.267640: step: 100/77, loss: 0.2334735244512558 2023-01-22 07:40:48.589179: step: 104/77, loss: 0.05039349943399429 2023-01-22 07:40:49.878810: step: 108/77, loss: 0.104027658700943 2023-01-22 07:40:51.201717: step: 112/77, loss: 0.11883699893951416 2023-01-22 07:40:52.521273: step: 116/77, loss: 0.16714473068714142 2023-01-22 07:40:53.824597: step: 120/77, loss: 0.16093513369560242 2023-01-22 07:40:55.106145: step: 124/77, loss: 0.08430910110473633 2023-01-22 07:40:56.386393: step: 128/77, loss: 0.09487202763557434 2023-01-22 07:40:57.674422: step: 132/77, loss: 0.11969651281833649 2023-01-22 07:40:59.015002: step: 136/77, loss: 0.12813276052474976 2023-01-22 07:41:00.299299: step: 140/77, loss: 0.10447872430086136 2023-01-22 07:41:01.637402: step: 144/77, loss: 0.08772681653499603 2023-01-22 07:41:02.993707: step: 148/77, loss: 0.10942377150058746 2023-01-22 07:41:04.282323: step: 152/77, loss: 0.03597911819815636 2023-01-22 07:41:05.539015: step: 156/77, loss: 0.05257668346166611 2023-01-22 07:41:06.808405: step: 160/77, loss: 0.048265159130096436 2023-01-22 07:41:08.105918: step: 164/77, loss: 0.07379506528377533 2023-01-22 07:41:09.392024: step: 168/77, loss: 0.04585869237780571 2023-01-22 07:41:10.658181: step: 172/77, loss: 0.04158536717295647 2023-01-22 07:41:11.943638: step: 176/77, loss: 0.2913612425327301 2023-01-22 07:41:13.304231: step: 180/77, loss: 0.04088529944419861 2023-01-22 07:41:14.571098: step: 184/77, loss: 0.08837796747684479 2023-01-22 07:41:15.848517: step: 188/77, loss: 0.038671888411045074 2023-01-22 07:41:17.168109: step: 192/77, loss: 0.04168698936700821 2023-01-22 07:41:18.481157: step: 196/77, loss: 0.04485369473695755 2023-01-22 07:41:19.756395: step: 200/77, loss: 0.18418560922145844 2023-01-22 07:41:21.070987: step: 204/77, loss: 0.07247032225131989 2023-01-22 07:41:22.373135: step: 208/77, loss: 0.04573575779795647 2023-01-22 07:41:23.647387: step: 212/77, loss: 0.04544593393802643 2023-01-22 07:41:24.933383: step: 216/77, loss: 0.07713460922241211 2023-01-22 07:41:26.256969: step: 220/77, loss: 0.10834340751171112 2023-01-22 07:41:27.533776: step: 224/77, loss: 0.049649372696876526 2023-01-22 07:41:28.815401: step: 228/77, loss: 0.06424231082201004 2023-01-22 07:41:30.103837: step: 232/77, loss: 0.0655415877699852 2023-01-22 07:41:31.466907: step: 236/77, loss: 0.12457633018493652 2023-01-22 07:41:32.786118: step: 240/77, loss: 0.09666450321674347 2023-01-22 07:41:34.106284: step: 244/77, loss: 0.045510344207286835 2023-01-22 07:41:35.423351: step: 248/77, loss: 0.013221305795013905 2023-01-22 07:41:36.743834: step: 252/77, loss: 0.01512373797595501 2023-01-22 07:41:38.051342: step: 256/77, loss: 0.03427667170763016 2023-01-22 07:41:39.412920: step: 260/77, loss: 0.015169290825724602 2023-01-22 07:41:40.732710: step: 264/77, loss: 0.028215918689966202 2023-01-22 07:41:42.026954: step: 268/77, loss: 0.026102934032678604 2023-01-22 07:41:43.368603: step: 272/77, loss: 0.014348288998007774 2023-01-22 07:41:44.633579: step: 276/77, loss: 0.020344989374279976 2023-01-22 07:41:45.960606: step: 280/77, loss: 0.13085316121578217 2023-01-22 07:41:47.300970: step: 284/77, loss: 0.0704241618514061 2023-01-22 07:41:48.642787: step: 288/77, loss: 0.07423460483551025 2023-01-22 07:41:49.926713: step: 292/77, loss: 0.028306419029831886 2023-01-22 07:41:51.262829: step: 296/77, loss: 0.06515046209096909 2023-01-22 07:41:52.592386: step: 300/77, loss: 0.3677542209625244 2023-01-22 07:41:53.853777: step: 304/77, loss: 0.04421716928482056 2023-01-22 07:41:55.129934: step: 308/77, loss: 0.009226376190781593 2023-01-22 07:41:56.454903: step: 312/77, loss: 0.018259337171912193 2023-01-22 07:41:57.758014: step: 316/77, loss: 0.05614681541919708 2023-01-22 07:41:59.118893: step: 320/77, loss: 0.019507668912410736 2023-01-22 07:42:00.416349: step: 324/77, loss: 0.06723038107156754 2023-01-22 07:42:01.696157: step: 328/77, loss: 0.057566963136196136 2023-01-22 07:42:03.046175: step: 332/77, loss: 0.02253660187125206 2023-01-22 07:42:04.373430: step: 336/77, loss: 0.024903327226638794 2023-01-22 07:42:05.737905: step: 340/77, loss: 0.04696238785982132 2023-01-22 07:42:07.101260: step: 344/77, loss: 0.10337799787521362 2023-01-22 07:42:08.399098: step: 348/77, loss: 0.10140752792358398 2023-01-22 07:42:09.688044: step: 352/77, loss: 0.08285317569971085 2023-01-22 07:42:10.980878: step: 356/77, loss: 0.1097133457660675 2023-01-22 07:42:12.288188: step: 360/77, loss: 0.04434497654438019 2023-01-22 07:42:13.594433: step: 364/77, loss: 0.0060016559436917305 2023-01-22 07:42:14.880272: step: 368/77, loss: 0.010138597339391708 2023-01-22 07:42:16.253401: step: 372/77, loss: 0.024792088195681572 2023-01-22 07:42:17.525164: step: 376/77, loss: 0.19264760613441467 2023-01-22 07:42:18.818764: step: 380/77, loss: 0.03846864402294159 2023-01-22 07:42:20.078393: step: 384/77, loss: 0.01814502663910389 2023-01-22 07:42:21.416919: step: 388/77, loss: 0.01226385124027729 ================================================== Loss: 0.081 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} New best chinese model... New best korean model... New best russian model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 3 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:44:29.852367: step: 4/77, loss: 0.008898122236132622 2023-01-22 07:44:31.137416: step: 8/77, loss: 0.07566316425800323 2023-01-22 07:44:32.494687: step: 12/77, loss: 0.02485734410583973 2023-01-22 07:44:33.815821: step: 16/77, loss: 0.04478280991315842 2023-01-22 07:44:35.113821: step: 20/77, loss: 0.11990983784198761 2023-01-22 07:44:36.422350: step: 24/77, loss: 0.007120449561625719 2023-01-22 07:44:37.694892: step: 28/77, loss: 0.060444124042987823 2023-01-22 07:44:38.984564: step: 32/77, loss: 0.049005698412656784 2023-01-22 07:44:40.277756: step: 36/77, loss: 0.07884863018989563 2023-01-22 07:44:41.593027: step: 40/77, loss: 0.015905214473605156 2023-01-22 07:44:42.886067: step: 44/77, loss: 0.041278135031461716 2023-01-22 07:44:44.202877: step: 48/77, loss: 0.01994415372610092 2023-01-22 07:44:45.532726: step: 52/77, loss: 0.03183678537607193 2023-01-22 07:44:46.850542: step: 56/77, loss: 0.011753171682357788 2023-01-22 07:44:48.224431: step: 60/77, loss: 0.11609509587287903 2023-01-22 07:44:49.465111: step: 64/77, loss: 0.024422401562333107 2023-01-22 07:44:50.808759: step: 68/77, loss: 0.039935797452926636 2023-01-22 07:44:52.075034: step: 72/77, loss: 0.02634143829345703 2023-01-22 07:44:53.378507: step: 76/77, loss: 0.012217414565384388 2023-01-22 07:44:54.700814: step: 80/77, loss: 0.0206155888736248 2023-01-22 07:44:56.010688: step: 84/77, loss: 0.042646557092666626 2023-01-22 07:44:57.331103: step: 88/77, loss: 0.008508237078785896 2023-01-22 07:44:58.626210: step: 92/77, loss: 0.02563760057091713 2023-01-22 07:44:59.921247: step: 96/77, loss: 0.10512962937355042 2023-01-22 07:45:01.236342: step: 100/77, loss: 0.10669828951358795 2023-01-22 07:45:02.574079: step: 104/77, loss: 0.03299158066511154 2023-01-22 07:45:03.915960: step: 108/77, loss: 0.06534292548894882 2023-01-22 07:45:05.208464: step: 112/77, loss: 0.051838312298059464 2023-01-22 07:45:06.538648: step: 116/77, loss: 0.01953636109828949 2023-01-22 07:45:07.826085: step: 120/77, loss: 0.04791083186864853 2023-01-22 07:45:09.118354: step: 124/77, loss: 0.0310364942997694 2023-01-22 07:45:10.404805: step: 128/77, loss: 0.019350484013557434 2023-01-22 07:45:11.774775: step: 132/77, loss: 0.13583087921142578 2023-01-22 07:45:13.033946: step: 136/77, loss: 0.002822377486154437 2023-01-22 07:45:14.393316: step: 140/77, loss: 0.030841834843158722 2023-01-22 07:45:15.701828: step: 144/77, loss: 0.035415925085544586 2023-01-22 07:45:16.984423: step: 148/77, loss: 0.1077069491147995 2023-01-22 07:45:18.279204: step: 152/77, loss: 0.016785571351647377 2023-01-22 07:45:19.574194: step: 156/77, loss: 0.026634112000465393 2023-01-22 07:45:20.931447: step: 160/77, loss: 0.030998708680272102 2023-01-22 07:45:22.245736: step: 164/77, loss: 0.037455003708601 2023-01-22 07:45:23.552745: step: 168/77, loss: 0.03089234046638012 2023-01-22 07:45:24.815659: step: 172/77, loss: 0.010667935013771057 2023-01-22 07:45:26.090073: step: 176/77, loss: 0.0074789999052882195 2023-01-22 07:45:27.338220: step: 180/77, loss: 0.020553266629576683 2023-01-22 07:45:28.630684: step: 184/77, loss: 0.036195676773786545 2023-01-22 07:45:29.910404: step: 188/77, loss: 0.053434185683727264 2023-01-22 07:45:31.239311: step: 192/77, loss: 0.04257701337337494 2023-01-22 07:45:32.578435: step: 196/77, loss: 0.0338403582572937 2023-01-22 07:45:33.923775: step: 200/77, loss: 0.011131498962640762 2023-01-22 07:45:35.283000: step: 204/77, loss: 0.29350656270980835 2023-01-22 07:45:36.569864: step: 208/77, loss: 0.014081919565796852 2023-01-22 07:45:37.883242: step: 212/77, loss: 0.021087724715471268 2023-01-22 07:45:39.176775: step: 216/77, loss: 0.034299690276384354 2023-01-22 07:45:40.465868: step: 220/77, loss: 0.09156246483325958 2023-01-22 07:45:41.774170: step: 224/77, loss: 0.04639114439487457 2023-01-22 07:45:43.089333: step: 228/77, loss: 0.046765901148319244 2023-01-22 07:45:44.438788: step: 232/77, loss: 0.009462382644414902 2023-01-22 07:45:45.777686: step: 236/77, loss: 0.026388362050056458 2023-01-22 07:45:47.080854: step: 240/77, loss: 0.005971093196421862 2023-01-22 07:45:48.376193: step: 244/77, loss: 0.006199051160365343 2023-01-22 07:45:49.685351: step: 248/77, loss: 0.02215702459216118 2023-01-22 07:45:50.985325: step: 252/77, loss: 0.11260189116001129 2023-01-22 07:45:52.267048: step: 256/77, loss: 0.0063177552074193954 2023-01-22 07:45:53.584123: step: 260/77, loss: 0.01616254635155201 2023-01-22 07:45:54.940723: step: 264/77, loss: 0.01467475201934576 2023-01-22 07:45:56.261636: step: 268/77, loss: 0.06858555227518082 2023-01-22 07:45:57.565247: step: 272/77, loss: 0.057683344930410385 2023-01-22 07:45:58.891697: step: 276/77, loss: 0.033667661249637604 2023-01-22 07:46:00.197982: step: 280/77, loss: 0.002162193413823843 2023-01-22 07:46:01.538490: step: 284/77, loss: 0.11145009100437164 2023-01-22 07:46:02.822219: step: 288/77, loss: 0.07552994787693024 2023-01-22 07:46:04.133105: step: 292/77, loss: 0.045220375061035156 2023-01-22 07:46:05.462132: step: 296/77, loss: 0.02754788286983967 2023-01-22 07:46:06.772363: step: 300/77, loss: 0.026420462876558304 2023-01-22 07:46:08.061677: step: 304/77, loss: 0.015606552362442017 2023-01-22 07:46:09.413038: step: 308/77, loss: 0.019723594188690186 2023-01-22 07:46:10.686821: step: 312/77, loss: 0.03442617505788803 2023-01-22 07:46:11.997795: step: 316/77, loss: 0.07348484545946121 2023-01-22 07:46:13.307869: step: 320/77, loss: 0.020555390045046806 2023-01-22 07:46:14.605501: step: 324/77, loss: 0.018619626760482788 2023-01-22 07:46:15.934673: step: 328/77, loss: 0.006300671026110649 2023-01-22 07:46:17.225930: step: 332/77, loss: 0.012363612651824951 2023-01-22 07:46:18.523950: step: 336/77, loss: 0.03483050316572189 2023-01-22 07:46:19.844225: step: 340/77, loss: 0.012440014630556107 2023-01-22 07:46:21.175291: step: 344/77, loss: 0.0694480910897255 2023-01-22 07:46:22.514063: step: 348/77, loss: 0.03845695033669472 2023-01-22 07:46:23.800658: step: 352/77, loss: 0.00946133490651846 2023-01-22 07:46:25.115427: step: 356/77, loss: 0.0837019756436348 2023-01-22 07:46:26.408300: step: 360/77, loss: 0.09910577535629272 2023-01-22 07:46:27.742966: step: 364/77, loss: 0.015134901739656925 2023-01-22 07:46:29.111602: step: 368/77, loss: 0.06312797218561172 2023-01-22 07:46:30.450450: step: 372/77, loss: 0.01796662248671055 2023-01-22 07:46:31.719734: step: 376/77, loss: 0.013086151331663132 2023-01-22 07:46:32.985932: step: 380/77, loss: 0.03351001441478729 2023-01-22 07:46:34.343586: step: 384/77, loss: 0.0543978177011013 2023-01-22 07:46:35.660145: step: 388/77, loss: 0.014590279199182987 ================================================== Loss: 0.042 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 3} Test Chinese: {'template': {'p': 0.9436619718309859, 'r': 0.5317460317460317, 'f1': 0.6802030456852791}, 'slot': {'p': 0.7419354838709677, 'r': 0.019759450171821305, 'f1': 0.038493723849372385}, 'combined': 0.026183548202111162, 'epoch': 3} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 3} Test Korean: {'template': {'p': 0.9444444444444444, 'r': 0.5396825396825397, 'f1': 0.6868686868686867}, 'slot': {'p': 0.7272727272727273, 'r': 0.020618556701030927, 'f1': 0.040100250626566414}, 'combined': 0.027543606490974905, 'epoch': 3} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 3} Test Russian: {'template': {'p': 0.9436619718309859, 'r': 0.5317460317460317, 'f1': 0.6802030456852791}, 'slot': {'p': 0.7419354838709677, 'r': 0.019759450171821305, 'f1': 0.038493723849372385}, 'combined': 0.026183548202111162, 'epoch': 3} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 3} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 3} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 3} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 4 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:48:16.252041: step: 4/77, loss: 0.023417659103870392 2023-01-22 07:48:17.540048: step: 8/77, loss: 0.0196953397244215 2023-01-22 07:48:18.810163: step: 12/77, loss: 0.010604934766888618 2023-01-22 07:48:20.111788: step: 16/77, loss: 0.026274144649505615 2023-01-22 07:48:21.408809: step: 20/77, loss: 0.0014932897174730897 2023-01-22 07:48:22.746340: step: 24/77, loss: 0.025961250066757202 2023-01-22 07:48:24.065256: step: 28/77, loss: 0.042515743523836136 2023-01-22 07:48:25.328381: step: 32/77, loss: 0.010017447173595428 2023-01-22 07:48:26.621440: step: 36/77, loss: 0.022811995819211006 2023-01-22 07:48:27.928346: step: 40/77, loss: 0.03783687949180603 2023-01-22 07:48:29.246523: step: 44/77, loss: 0.025669317692518234 2023-01-22 07:48:30.537307: step: 48/77, loss: 0.012726683169603348 2023-01-22 07:48:31.813202: step: 52/77, loss: 0.09254588186740875 2023-01-22 07:48:33.065066: step: 56/77, loss: 0.03728096932172775 2023-01-22 07:48:34.351022: step: 60/77, loss: 0.02287282608449459 2023-01-22 07:48:35.621698: step: 64/77, loss: 0.021858546882867813 2023-01-22 07:48:36.871564: step: 68/77, loss: 0.010480173863470554 2023-01-22 07:48:38.192445: step: 72/77, loss: 0.11430490016937256 2023-01-22 07:48:39.471892: step: 76/77, loss: 0.0045967064797878265 2023-01-22 07:48:40.774142: step: 80/77, loss: 0.04011622816324234 2023-01-22 07:48:42.066746: step: 84/77, loss: 0.06810526549816132 2023-01-22 07:48:43.348437: step: 88/77, loss: 0.0022294847294688225 2023-01-22 07:48:44.663922: step: 92/77, loss: 0.015196477994322777 2023-01-22 07:48:45.931410: step: 96/77, loss: 0.02204442210495472 2023-01-22 07:48:47.245800: step: 100/77, loss: 0.051844045519828796 2023-01-22 07:48:48.563850: step: 104/77, loss: 0.005850006360560656 2023-01-22 07:48:49.905333: step: 108/77, loss: 0.053736791014671326 2023-01-22 07:48:51.186794: step: 112/77, loss: 0.04627533629536629 2023-01-22 07:48:52.483820: step: 116/77, loss: 0.00092123361537233 2023-01-22 07:48:53.786202: step: 120/77, loss: 0.039544571191072464 2023-01-22 07:48:55.131495: step: 124/77, loss: 0.04642752930521965 2023-01-22 07:48:56.435025: step: 128/77, loss: 0.11437688767910004 2023-01-22 07:48:57.716453: step: 132/77, loss: 0.007598129101097584 2023-01-22 07:48:59.053280: step: 136/77, loss: 0.009800883010029793 2023-01-22 07:49:00.326735: step: 140/77, loss: 0.010883791372179985 2023-01-22 07:49:01.644805: step: 144/77, loss: 0.1618647277355194 2023-01-22 07:49:02.975909: step: 148/77, loss: 0.03676885738968849 2023-01-22 07:49:04.331924: step: 152/77, loss: 0.011590557172894478 2023-01-22 07:49:05.630857: step: 156/77, loss: 0.02291189506649971 2023-01-22 07:49:07.005045: step: 160/77, loss: 0.003542313352227211 2023-01-22 07:49:08.272276: step: 164/77, loss: 0.0626843124628067 2023-01-22 07:49:09.593618: step: 168/77, loss: 0.0027569583617150784 2023-01-22 07:49:10.922905: step: 172/77, loss: 0.017458012327551842 2023-01-22 07:49:12.254388: step: 176/77, loss: 0.030079776421189308 2023-01-22 07:49:13.524717: step: 180/77, loss: 0.008054882287979126 2023-01-22 07:49:14.788605: step: 184/77, loss: 0.0851554423570633 2023-01-22 07:49:16.121382: step: 188/77, loss: 0.04232963174581528 2023-01-22 07:49:17.409921: step: 192/77, loss: 0.029516037553548813 2023-01-22 07:49:18.741680: step: 196/77, loss: 0.018004145473241806 2023-01-22 07:49:20.071710: step: 200/77, loss: 0.02017074078321457 2023-01-22 07:49:21.385819: step: 204/77, loss: 0.017187729477882385 2023-01-22 07:49:22.714717: step: 208/77, loss: 0.021137960255146027 2023-01-22 07:49:24.016061: step: 212/77, loss: 0.010061761364340782 2023-01-22 07:49:25.342333: step: 216/77, loss: 0.03293940797448158 2023-01-22 07:49:26.651510: step: 220/77, loss: 0.028204970061779022 2023-01-22 07:49:27.979379: step: 224/77, loss: 0.03787591680884361 2023-01-22 07:49:29.285135: step: 228/77, loss: 0.007750194985419512 2023-01-22 07:49:30.578662: step: 232/77, loss: 0.11326970160007477 2023-01-22 07:49:31.876421: step: 236/77, loss: 0.0397779680788517 2023-01-22 07:49:33.227802: step: 240/77, loss: 0.009334595873951912 2023-01-22 07:49:34.527973: step: 244/77, loss: 0.007776356302201748 2023-01-22 07:49:35.825317: step: 248/77, loss: 0.017981214448809624 2023-01-22 07:49:37.097951: step: 252/77, loss: 0.042225658893585205 2023-01-22 07:49:38.447473: step: 256/77, loss: 0.021111395210027695 2023-01-22 07:49:39.751490: step: 260/77, loss: 0.09040164202451706 2023-01-22 07:49:41.097888: step: 264/77, loss: 0.13206541538238525 2023-01-22 07:49:42.417646: step: 268/77, loss: 0.004779032897204161 2023-01-22 07:49:43.703056: step: 272/77, loss: 0.088816799223423 2023-01-22 07:49:44.996465: step: 276/77, loss: 0.02132064662873745 2023-01-22 07:49:46.302901: step: 280/77, loss: 0.06475003808736801 2023-01-22 07:49:47.586776: step: 284/77, loss: 0.0050977421924471855 2023-01-22 07:49:48.914596: step: 288/77, loss: 0.014675735495984554 2023-01-22 07:49:50.197233: step: 292/77, loss: 0.06635600328445435 2023-01-22 07:49:51.487020: step: 296/77, loss: 0.03008580021560192 2023-01-22 07:49:52.782527: step: 300/77, loss: 0.006084037013351917 2023-01-22 07:49:54.081690: step: 304/77, loss: 0.05068189650774002 2023-01-22 07:49:55.401862: step: 308/77, loss: 0.07237912714481354 2023-01-22 07:49:56.696563: step: 312/77, loss: 0.02533440850675106 2023-01-22 07:49:57.963824: step: 316/77, loss: 0.05642259865999222 2023-01-22 07:49:59.264783: step: 320/77, loss: 0.07874306291341782 2023-01-22 07:50:00.603417: step: 324/77, loss: 0.0407525859773159 2023-01-22 07:50:01.939802: step: 328/77, loss: 0.017745740711688995 2023-01-22 07:50:03.268910: step: 332/77, loss: 0.012438332661986351 2023-01-22 07:50:04.588497: step: 336/77, loss: 0.02190583571791649 2023-01-22 07:50:05.878394: step: 340/77, loss: 0.014679135754704475 2023-01-22 07:50:07.174005: step: 344/77, loss: 0.007208996452391148 2023-01-22 07:50:08.458495: step: 348/77, loss: 0.005275039467960596 2023-01-22 07:50:09.746432: step: 352/77, loss: 0.01978233829140663 2023-01-22 07:50:11.086594: step: 356/77, loss: 0.03178076446056366 2023-01-22 07:50:12.349573: step: 360/77, loss: 0.04151931032538414 2023-01-22 07:50:13.648676: step: 364/77, loss: 0.004386279731988907 2023-01-22 07:50:15.002305: step: 368/77, loss: 0.046472564339637756 2023-01-22 07:50:16.316922: step: 372/77, loss: 0.06333746761083603 2023-01-22 07:50:17.619569: step: 376/77, loss: 0.026951458305120468 2023-01-22 07:50:18.955289: step: 380/77, loss: 0.017069118097424507 2023-01-22 07:50:20.283040: step: 384/77, loss: 0.025377962738275528 2023-01-22 07:50:21.590088: step: 388/77, loss: 0.005843974184244871 ================================================== Loss: 0.034 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Chinese: {'template': {'p': 0.9180327868852459, 'r': 0.4444444444444444, 'f1': 0.5989304812834225}, 'slot': {'p': 0.5517241379310345, 'r': 0.013745704467353952, 'f1': 0.02682313495389774}, 'combined': 0.016065193127468166, 'epoch': 4} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Korean: {'template': {'p': 0.9166666666666666, 'r': 0.4365079365079365, 'f1': 0.5913978494623655}, 'slot': {'p': 0.5517241379310345, 'r': 0.013745704467353952, 'f1': 0.02682313495389774}, 'combined': 0.01586314432757393, 'epoch': 4} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 4} Test Russian: {'template': {'p': 0.9180327868852459, 'r': 0.4444444444444444, 'f1': 0.5989304812834225}, 'slot': {'p': 0.5517241379310345, 'r': 0.013745704467353952, 'f1': 0.02682313495389774}, 'combined': 0.016065193127468166, 'epoch': 4} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 4} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 4} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 4} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 5 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:52:01.779232: step: 4/77, loss: 0.009712163358926773 2023-01-22 07:52:03.067768: step: 8/77, loss: 0.07672275602817535 2023-01-22 07:52:04.405155: step: 12/77, loss: 0.00230649346485734 2023-01-22 07:52:05.689941: step: 16/77, loss: 0.019387666136026382 2023-01-22 07:52:07.007583: step: 20/77, loss: 0.015886085107922554 2023-01-22 07:52:08.319663: step: 24/77, loss: 0.05474954470992088 2023-01-22 07:52:09.637815: step: 28/77, loss: 0.04290302097797394 2023-01-22 07:52:10.938645: step: 32/77, loss: 0.06911797821521759 2023-01-22 07:52:12.224648: step: 36/77, loss: 0.008430874906480312 2023-01-22 07:52:13.507405: step: 40/77, loss: 0.011744523420929909 2023-01-22 07:52:14.816255: step: 44/77, loss: 0.06511920690536499 2023-01-22 07:52:16.150522: step: 48/77, loss: 0.03562755882740021 2023-01-22 07:52:17.484589: step: 52/77, loss: 0.020151851698756218 2023-01-22 07:52:18.759756: step: 56/77, loss: 0.0567491240799427 2023-01-22 07:52:20.018528: step: 60/77, loss: 0.08791999518871307 2023-01-22 07:52:21.282991: step: 64/77, loss: 0.03458832576870918 2023-01-22 07:52:22.605834: step: 68/77, loss: 0.0075601390562951565 2023-01-22 07:52:23.904570: step: 72/77, loss: 0.006983015686273575 2023-01-22 07:52:25.190859: step: 76/77, loss: 0.008428785018622875 2023-01-22 07:52:26.463389: step: 80/77, loss: 0.017788860946893692 2023-01-22 07:52:27.727702: step: 84/77, loss: 0.028414368629455566 2023-01-22 07:52:29.081230: step: 88/77, loss: 0.04921044781804085 2023-01-22 07:52:30.410651: step: 92/77, loss: 0.03103695623576641 2023-01-22 07:52:31.741940: step: 96/77, loss: 0.037225544452667236 2023-01-22 07:52:33.008708: step: 100/77, loss: 0.01973811909556389 2023-01-22 07:52:34.297022: step: 104/77, loss: 0.020425807684659958 2023-01-22 07:52:35.588057: step: 108/77, loss: 0.029876621440052986 2023-01-22 07:52:36.944659: step: 112/77, loss: 0.05362161621451378 2023-01-22 07:52:38.216412: step: 116/77, loss: 0.05245602875947952 2023-01-22 07:52:39.505444: step: 120/77, loss: 0.015833435580134392 2023-01-22 07:52:40.763973: step: 124/77, loss: 0.012610914185643196 2023-01-22 07:52:42.040998: step: 128/77, loss: 0.003384954761713743 2023-01-22 07:52:43.380758: step: 132/77, loss: 0.026845553889870644 2023-01-22 07:52:44.698017: step: 136/77, loss: 0.1506502628326416 2023-01-22 07:52:45.992200: step: 140/77, loss: 0.025692788884043694 2023-01-22 07:52:47.338931: step: 144/77, loss: 0.025525707751512527 2023-01-22 07:52:48.603028: step: 148/77, loss: 0.009272797964513302 2023-01-22 07:52:49.897983: step: 152/77, loss: 0.011932496912777424 2023-01-22 07:52:51.227170: step: 156/77, loss: 0.010740198194980621 2023-01-22 07:52:52.481425: step: 160/77, loss: 0.012506979517638683 2023-01-22 07:52:53.799471: step: 164/77, loss: 0.03089609183371067 2023-01-22 07:52:55.127263: step: 168/77, loss: 0.03214671462774277 2023-01-22 07:52:56.468173: step: 172/77, loss: 0.01356554962694645 2023-01-22 07:52:57.727825: step: 176/77, loss: 0.005248316563665867 2023-01-22 07:52:59.061994: step: 180/77, loss: 0.017001446336507797 2023-01-22 07:53:00.409341: step: 184/77, loss: 0.016217608004808426 2023-01-22 07:53:01.749689: step: 188/77, loss: 0.0104384645819664 2023-01-22 07:53:03.024021: step: 192/77, loss: 0.002319543156772852 2023-01-22 07:53:04.333923: step: 196/77, loss: 0.02026873268187046 2023-01-22 07:53:05.629603: step: 200/77, loss: 0.014656349085271358 2023-01-22 07:53:06.922867: step: 204/77, loss: 0.026904229074716568 2023-01-22 07:53:08.235024: step: 208/77, loss: 0.02369135618209839 2023-01-22 07:53:09.508631: step: 212/77, loss: 0.030837981030344963 2023-01-22 07:53:10.795811: step: 216/77, loss: 0.021040054038167 2023-01-22 07:53:12.157314: step: 220/77, loss: 0.04531940072774887 2023-01-22 07:53:13.498049: step: 224/77, loss: 0.030913038179278374 2023-01-22 07:53:14.827607: step: 228/77, loss: 0.0013430267572402954 2023-01-22 07:53:16.104692: step: 232/77, loss: 0.006149583961814642 2023-01-22 07:53:17.426491: step: 236/77, loss: 0.12254571914672852 2023-01-22 07:53:18.750630: step: 240/77, loss: 0.1943351775407791 2023-01-22 07:53:20.026986: step: 244/77, loss: 0.023108499124646187 2023-01-22 07:53:21.282360: step: 248/77, loss: 0.007678337395191193 2023-01-22 07:53:22.538609: step: 252/77, loss: 0.00511655118316412 2023-01-22 07:53:23.853758: step: 256/77, loss: 0.08207176625728607 2023-01-22 07:53:25.168018: step: 260/77, loss: 0.1483311504125595 2023-01-22 07:53:26.449798: step: 264/77, loss: 0.005785231478512287 2023-01-22 07:53:27.780223: step: 268/77, loss: 0.014274337328970432 2023-01-22 07:53:29.084914: step: 272/77, loss: 0.057694658637046814 2023-01-22 07:53:30.422302: step: 276/77, loss: 0.008485383354127407 2023-01-22 07:53:31.733295: step: 280/77, loss: 0.004589818883687258 2023-01-22 07:53:33.029932: step: 284/77, loss: 0.04261079430580139 2023-01-22 07:53:34.315202: step: 288/77, loss: 0.030632158741354942 2023-01-22 07:53:35.590614: step: 292/77, loss: 0.003071536310017109 2023-01-22 07:53:36.929642: step: 296/77, loss: 0.06164884567260742 2023-01-22 07:53:38.224586: step: 300/77, loss: 0.07565826177597046 2023-01-22 07:53:39.543598: step: 304/77, loss: 0.02637699618935585 2023-01-22 07:53:40.868483: step: 308/77, loss: 0.053700368851423264 2023-01-22 07:53:42.141898: step: 312/77, loss: 0.032959625124931335 2023-01-22 07:53:43.409715: step: 316/77, loss: 0.05380704253911972 2023-01-22 07:53:44.822911: step: 320/77, loss: 0.01774766482412815 2023-01-22 07:53:46.123180: step: 324/77, loss: 0.04747108370065689 2023-01-22 07:53:47.392041: step: 328/77, loss: 0.02787405252456665 2023-01-22 07:53:48.740222: step: 332/77, loss: 0.0015647481195628643 2023-01-22 07:53:50.079696: step: 336/77, loss: 0.02628343552350998 2023-01-22 07:53:51.395354: step: 340/77, loss: 0.0313742458820343 2023-01-22 07:53:52.705492: step: 344/77, loss: 0.00651584193110466 2023-01-22 07:53:54.051624: step: 348/77, loss: 0.006083859130740166 2023-01-22 07:53:55.370934: step: 352/77, loss: 0.028530307114124298 2023-01-22 07:53:56.678131: step: 356/77, loss: 0.017432240769267082 2023-01-22 07:53:57.980180: step: 360/77, loss: 0.028564533218741417 2023-01-22 07:53:59.343594: step: 364/77, loss: 0.008658488281071186 2023-01-22 07:54:00.657831: step: 368/77, loss: 0.0064378841780126095 2023-01-22 07:54:01.959776: step: 372/77, loss: 0.08271316438913345 2023-01-22 07:54:03.318549: step: 376/77, loss: 0.037459179759025574 2023-01-22 07:54:04.697004: step: 380/77, loss: 0.002762680407613516 2023-01-22 07:54:06.023397: step: 384/77, loss: 0.038842104375362396 2023-01-22 07:54:07.379100: step: 388/77, loss: 0.02973165735602379 ================================================== Loss: 0.032 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 5} Test Chinese: {'template': {'p': 0.971830985915493, 'r': 0.5476190476190477, 'f1': 0.7005076142131981}, 'slot': {'p': 0.7027027027027027, 'r': 0.022336769759450172, 'f1': 0.04329725228975853}, 'combined': 0.030330054903485677, 'epoch': 5} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 5} Test Korean: {'template': {'p': 0.9714285714285714, 'r': 0.5396825396825397, 'f1': 0.6938775510204082}, 'slot': {'p': 0.6944444444444444, 'r': 0.02147766323024055, 'f1': 0.041666666666666664}, 'combined': 0.028911564625850338, 'epoch': 5} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 5} Test Russian: {'template': {'p': 0.9710144927536232, 'r': 0.5317460317460317, 'f1': 0.6871794871794872}, 'slot': {'p': 0.7058823529411765, 'r': 0.020618556701030927, 'f1': 0.04006677796327212}, 'combined': 0.02753306793373571, 'epoch': 5} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 5} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 5} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 5} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 6 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:55:48.198381: step: 4/77, loss: 0.03020402044057846 2023-01-22 07:55:49.496978: step: 8/77, loss: 0.06450265645980835 2023-01-22 07:55:50.813341: step: 12/77, loss: 0.022785644978284836 2023-01-22 07:55:52.145103: step: 16/77, loss: 0.035041648894548416 2023-01-22 07:55:53.446776: step: 20/77, loss: 0.00880738440901041 2023-01-22 07:55:54.699304: step: 24/77, loss: 0.0014209969667717814 2023-01-22 07:55:55.937645: step: 28/77, loss: 0.014696823433041573 2023-01-22 07:55:57.229067: step: 32/77, loss: 0.06408432126045227 2023-01-22 07:55:58.575694: step: 36/77, loss: 0.023726556450128555 2023-01-22 07:55:59.863787: step: 40/77, loss: 0.01294927392154932 2023-01-22 07:56:01.170991: step: 44/77, loss: 0.02006208896636963 2023-01-22 07:56:02.474730: step: 48/77, loss: 0.007329146843403578 2023-01-22 07:56:03.805337: step: 52/77, loss: 0.01106266863644123 2023-01-22 07:56:05.112045: step: 56/77, loss: 0.0002934904769062996 2023-01-22 07:56:06.394981: step: 60/77, loss: 0.0036544944159686565 2023-01-22 07:56:07.745742: step: 64/77, loss: 0.011319036595523357 2023-01-22 07:56:09.047025: step: 68/77, loss: 0.05203656852245331 2023-01-22 07:56:10.266844: step: 72/77, loss: 0.04454466328024864 2023-01-22 07:56:11.589334: step: 76/77, loss: 0.05974990129470825 2023-01-22 07:56:12.896823: step: 80/77, loss: 0.05492483824491501 2023-01-22 07:56:14.187000: step: 84/77, loss: 0.0255147572606802 2023-01-22 07:56:15.504605: step: 88/77, loss: 0.05782134830951691 2023-01-22 07:56:16.820410: step: 92/77, loss: 0.03452653810381889 2023-01-22 07:56:18.083814: step: 96/77, loss: 0.0071827988140285015 2023-01-22 07:56:19.420407: step: 100/77, loss: 0.039882779121398926 2023-01-22 07:56:20.731903: step: 104/77, loss: 0.028421707451343536 2023-01-22 07:56:22.032849: step: 108/77, loss: 0.010081905871629715 2023-01-22 07:56:23.365779: step: 112/77, loss: 0.00359415914863348 2023-01-22 07:56:24.638826: step: 116/77, loss: 0.005187658593058586 2023-01-22 07:56:25.929697: step: 120/77, loss: 0.04282506927847862 2023-01-22 07:56:27.253386: step: 124/77, loss: 0.044339973479509354 2023-01-22 07:56:28.549769: step: 128/77, loss: 0.0011450829915702343 2023-01-22 07:56:29.879623: step: 132/77, loss: 0.016200868412852287 2023-01-22 07:56:31.131695: step: 136/77, loss: 0.040591076016426086 2023-01-22 07:56:32.416199: step: 140/77, loss: 0.0006642768858000636 2023-01-22 07:56:33.700849: step: 144/77, loss: 0.028512677177786827 2023-01-22 07:56:35.028704: step: 148/77, loss: 0.04284897446632385 2023-01-22 07:56:36.312333: step: 152/77, loss: 0.006877818610519171 2023-01-22 07:56:37.600844: step: 156/77, loss: 0.015897460281848907 2023-01-22 07:56:38.908720: step: 160/77, loss: 0.01649112068116665 2023-01-22 07:56:40.223982: step: 164/77, loss: 0.00428872462362051 2023-01-22 07:56:41.507849: step: 168/77, loss: 0.00872961524873972 2023-01-22 07:56:42.884690: step: 172/77, loss: 0.42743775248527527 2023-01-22 07:56:44.199864: step: 176/77, loss: 0.02830498293042183 2023-01-22 07:56:45.509112: step: 180/77, loss: 0.014903073199093342 2023-01-22 07:56:46.859822: step: 184/77, loss: 0.00024340944946743548 2023-01-22 07:56:48.182781: step: 188/77, loss: 0.010939395055174828 2023-01-22 07:56:49.483150: step: 192/77, loss: 0.08201880753040314 2023-01-22 07:56:50.779626: step: 196/77, loss: 0.016874713823199272 2023-01-22 07:56:52.037214: step: 200/77, loss: 0.014531994238495827 2023-01-22 07:56:53.324430: step: 204/77, loss: 0.0049271308816969395 2023-01-22 07:56:54.642283: step: 208/77, loss: 0.04061633720993996 2023-01-22 07:56:55.920314: step: 212/77, loss: 0.002478919690474868 2023-01-22 07:56:57.246462: step: 216/77, loss: 0.04487251490354538 2023-01-22 07:56:58.573482: step: 220/77, loss: 0.022969551384449005 2023-01-22 07:56:59.916346: step: 224/77, loss: 0.01762833259999752 2023-01-22 07:57:01.219341: step: 228/77, loss: 0.023906050249934196 2023-01-22 07:57:02.481623: step: 232/77, loss: 0.006751799024641514 2023-01-22 07:57:03.770450: step: 236/77, loss: 0.02012111060321331 2023-01-22 07:57:05.079740: step: 240/77, loss: 0.0218367762863636 2023-01-22 07:57:06.384565: step: 244/77, loss: 0.002203400479629636 2023-01-22 07:57:07.648488: step: 248/77, loss: 0.03326624631881714 2023-01-22 07:57:08.930771: step: 252/77, loss: 0.018131040036678314 2023-01-22 07:57:10.251906: step: 256/77, loss: 0.017648430541157722 2023-01-22 07:57:11.549041: step: 260/77, loss: 0.049811914563179016 2023-01-22 07:57:12.833591: step: 264/77, loss: 0.013499320484697819 2023-01-22 07:57:14.143525: step: 268/77, loss: 0.00505072483792901 2023-01-22 07:57:15.439916: step: 272/77, loss: 0.0012601492926478386 2023-01-22 07:57:16.717171: step: 276/77, loss: 0.011764097958803177 2023-01-22 07:57:18.004219: step: 280/77, loss: 0.008309504017233849 2023-01-22 07:57:19.321979: step: 284/77, loss: 0.0017276185099035501 2023-01-22 07:57:20.614718: step: 288/77, loss: 0.004906029440462589 2023-01-22 07:57:21.924148: step: 292/77, loss: 0.012138359248638153 2023-01-22 07:57:23.240118: step: 296/77, loss: 0.03456299006938934 2023-01-22 07:57:24.575365: step: 300/77, loss: 0.0358954556286335 2023-01-22 07:57:25.878487: step: 304/77, loss: 0.0026492213364690542 2023-01-22 07:57:27.176536: step: 308/77, loss: 0.010927144438028336 2023-01-22 07:57:28.519537: step: 312/77, loss: 0.17556488513946533 2023-01-22 07:57:29.789735: step: 316/77, loss: 0.02160274237394333 2023-01-22 07:57:31.143979: step: 320/77, loss: 0.0010978600475937128 2023-01-22 07:57:32.402202: step: 324/77, loss: 0.03171160817146301 2023-01-22 07:57:33.675024: step: 328/77, loss: 0.017102569341659546 2023-01-22 07:57:34.936840: step: 332/77, loss: 0.00043492187978699803 2023-01-22 07:57:36.242153: step: 336/77, loss: 0.014497132040560246 2023-01-22 07:57:37.563870: step: 340/77, loss: 0.03126572445034981 2023-01-22 07:57:38.871383: step: 344/77, loss: 0.08123748749494553 2023-01-22 07:57:40.185349: step: 348/77, loss: 0.014668785035610199 2023-01-22 07:57:41.526407: step: 352/77, loss: 0.08094578236341476 2023-01-22 07:57:42.788541: step: 356/77, loss: 0.00300383847206831 2023-01-22 07:57:44.063291: step: 360/77, loss: 0.0017249882221221924 2023-01-22 07:57:45.345662: step: 364/77, loss: 0.00893208384513855 2023-01-22 07:57:46.628008: step: 368/77, loss: 0.008719152770936489 2023-01-22 07:57:47.941917: step: 372/77, loss: 0.05079510062932968 2023-01-22 07:57:49.294220: step: 376/77, loss: 0.010510473512113094 2023-01-22 07:57:50.559848: step: 380/77, loss: 0.01431712880730629 2023-01-22 07:57:51.866945: step: 384/77, loss: 0.005489309784024954 2023-01-22 07:57:53.198235: step: 388/77, loss: 0.0007201767875812948 ================================================== Loss: 0.027 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 6} Test Chinese: {'template': {'p': 0.9558823529411765, 'r': 0.5158730158730159, 'f1': 0.6701030927835052}, 'slot': {'p': 0.5555555555555556, 'r': 0.01288659793814433, 'f1': 0.02518891687657431}, 'combined': 0.016879171102859074, 'epoch': 6} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 6} Test Korean: {'template': {'p': 0.9558823529411765, 'r': 0.5158730158730159, 'f1': 0.6701030927835052}, 'slot': {'p': 0.5555555555555556, 'r': 0.01288659793814433, 'f1': 0.02518891687657431}, 'combined': 0.016879171102859074, 'epoch': 6} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 6} Test Russian: {'template': {'p': 0.9558823529411765, 'r': 0.5158730158730159, 'f1': 0.6701030927835052}, 'slot': {'p': 0.5555555555555556, 'r': 0.01288659793814433, 'f1': 0.02518891687657431}, 'combined': 0.016879171102859074, 'epoch': 6} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 6} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 6} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 6} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 7 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 07:59:33.652981: step: 4/77, loss: 0.039592523127794266 2023-01-22 07:59:34.989120: step: 8/77, loss: 0.03647180274128914 2023-01-22 07:59:36.247982: step: 12/77, loss: 0.05950095131993294 2023-01-22 07:59:37.528977: step: 16/77, loss: 0.020810147747397423 2023-01-22 07:59:38.793527: step: 20/77, loss: 0.049334120005369186 2023-01-22 07:59:40.098725: step: 24/77, loss: 0.011051503010094166 2023-01-22 07:59:41.421992: step: 28/77, loss: 0.13336032629013062 2023-01-22 07:59:42.756691: step: 32/77, loss: 0.03745570033788681 2023-01-22 07:59:44.078004: step: 36/77, loss: 0.009318767115473747 2023-01-22 07:59:45.380055: step: 40/77, loss: 0.020486967638134956 2023-01-22 07:59:46.657947: step: 44/77, loss: 0.04494426026940346 2023-01-22 07:59:47.995602: step: 48/77, loss: 0.006008772645145655 2023-01-22 07:59:49.286884: step: 52/77, loss: 0.003612846601754427 2023-01-22 07:59:50.667306: step: 56/77, loss: 0.01742379181087017 2023-01-22 07:59:51.994028: step: 60/77, loss: 0.006468200124800205 2023-01-22 07:59:53.293997: step: 64/77, loss: 0.004890457261353731 2023-01-22 07:59:54.573838: step: 68/77, loss: 0.0017555034719407558 2023-01-22 07:59:55.834329: step: 72/77, loss: 0.10227682441473007 2023-01-22 07:59:57.095557: step: 76/77, loss: 0.01095428504049778 2023-01-22 07:59:58.372810: step: 80/77, loss: 0.03547348454594612 2023-01-22 07:59:59.645272: step: 84/77, loss: 0.014274870045483112 2023-01-22 08:00:00.997233: step: 88/77, loss: 0.019179657101631165 2023-01-22 08:00:02.370384: step: 92/77, loss: 0.001970258541405201 2023-01-22 08:00:03.648446: step: 96/77, loss: 0.004264642484486103 2023-01-22 08:00:04.997602: step: 100/77, loss: 0.007505690213292837 2023-01-22 08:00:06.280395: step: 104/77, loss: 0.009430565871298313 2023-01-22 08:00:07.523061: step: 108/77, loss: 0.00442750146612525 2023-01-22 08:00:08.868053: step: 112/77, loss: 0.0187971368432045 2023-01-22 08:00:10.257320: step: 116/77, loss: 0.0029458203352987766 2023-01-22 08:00:11.545683: step: 120/77, loss: 0.0011755856685340405 2023-01-22 08:00:12.834120: step: 124/77, loss: 0.05318979546427727 2023-01-22 08:00:14.131719: step: 128/77, loss: 0.07241583615541458 2023-01-22 08:00:15.451620: step: 132/77, loss: 0.03433309495449066 2023-01-22 08:00:16.774262: step: 136/77, loss: 0.006371957249939442 2023-01-22 08:00:18.064234: step: 140/77, loss: 0.00034351838985458016 2023-01-22 08:00:19.417305: step: 144/77, loss: 0.02569696307182312 2023-01-22 08:00:20.712829: step: 148/77, loss: 0.012018335051834583 2023-01-22 08:00:22.015528: step: 152/77, loss: 0.0008478729287162423 2023-01-22 08:00:23.298622: step: 156/77, loss: 0.00034335945383645594 2023-01-22 08:00:24.604352: step: 160/77, loss: 0.0005546339671127498 2023-01-22 08:00:25.907967: step: 164/77, loss: 0.007714861538261175 2023-01-22 08:00:27.297475: step: 168/77, loss: 0.024812553077936172 2023-01-22 08:00:28.603397: step: 172/77, loss: 0.032302480190992355 2023-01-22 08:00:29.915069: step: 176/77, loss: 0.054514043033123016 2023-01-22 08:00:31.227689: step: 180/77, loss: 0.0036903393920511007 2023-01-22 08:00:32.557550: step: 184/77, loss: 0.03750649094581604 2023-01-22 08:00:33.842852: step: 188/77, loss: 0.0182705819606781 2023-01-22 08:00:35.149227: step: 192/77, loss: 0.03227635845541954 2023-01-22 08:00:36.442593: step: 196/77, loss: 0.012850667349994183 2023-01-22 08:00:37.688811: step: 200/77, loss: 0.02886662445962429 2023-01-22 08:00:38.977303: step: 204/77, loss: 0.011203744448721409 2023-01-22 08:00:40.290545: step: 208/77, loss: 0.004409831017255783 2023-01-22 08:00:41.609492: step: 212/77, loss: 0.04057746380567551 2023-01-22 08:00:42.932837: step: 216/77, loss: 0.01268512848764658 2023-01-22 08:00:44.173468: step: 220/77, loss: 0.002502129413187504 2023-01-22 08:00:45.472922: step: 224/77, loss: 0.03674924001097679 2023-01-22 08:00:46.711686: step: 228/77, loss: 0.05796860530972481 2023-01-22 08:00:47.996340: step: 232/77, loss: 0.05133785307407379 2023-01-22 08:00:49.291689: step: 236/77, loss: 0.006198606453835964 2023-01-22 08:00:50.596673: step: 240/77, loss: 0.10119032859802246 2023-01-22 08:00:51.895852: step: 244/77, loss: 0.02887742966413498 2023-01-22 08:00:53.178607: step: 248/77, loss: 0.024643737822771072 2023-01-22 08:00:54.456345: step: 252/77, loss: 0.057729966938495636 2023-01-22 08:00:55.783572: step: 256/77, loss: 0.029447536915540695 2023-01-22 08:00:57.113676: step: 260/77, loss: 0.06826838105916977 2023-01-22 08:00:58.449121: step: 264/77, loss: 0.005282876547425985 2023-01-22 08:00:59.765972: step: 268/77, loss: 0.03360510990023613 2023-01-22 08:01:01.068168: step: 272/77, loss: 0.0028833940159529448 2023-01-22 08:01:02.390648: step: 276/77, loss: 0.0013751761289313436 2023-01-22 08:01:03.688584: step: 280/77, loss: 0.023520752787590027 2023-01-22 08:01:04.996965: step: 284/77, loss: 0.003494781441986561 2023-01-22 08:01:06.326167: step: 288/77, loss: 0.01919829472899437 2023-01-22 08:01:07.631393: step: 292/77, loss: 0.017101947218179703 2023-01-22 08:01:08.931968: step: 296/77, loss: 0.04143914580345154 2023-01-22 08:01:10.232906: step: 300/77, loss: 0.005428193137049675 2023-01-22 08:01:11.547079: step: 304/77, loss: 0.012205487117171288 2023-01-22 08:01:12.850281: step: 308/77, loss: 0.01458023302257061 2023-01-22 08:01:14.200626: step: 312/77, loss: 0.03651302307844162 2023-01-22 08:01:15.526959: step: 316/77, loss: 0.006091257557272911 2023-01-22 08:01:16.882336: step: 320/77, loss: 0.006200199481099844 2023-01-22 08:01:18.202743: step: 324/77, loss: 0.03252246230840683 2023-01-22 08:01:19.550942: step: 328/77, loss: 0.017601091414690018 2023-01-22 08:01:20.886506: step: 332/77, loss: 0.002433488378301263 2023-01-22 08:01:22.206423: step: 336/77, loss: 0.06904677301645279 2023-01-22 08:01:23.508453: step: 340/77, loss: 0.006120236124843359 2023-01-22 08:01:24.826608: step: 344/77, loss: 0.007540901191532612 2023-01-22 08:01:26.139152: step: 348/77, loss: 0.009963840246200562 2023-01-22 08:01:27.437643: step: 352/77, loss: 0.003343338379636407 2023-01-22 08:01:28.719925: step: 356/77, loss: 0.034836817532777786 2023-01-22 08:01:30.074474: step: 360/77, loss: 0.01343780942261219 2023-01-22 08:01:31.384142: step: 364/77, loss: 0.00487480266019702 2023-01-22 08:01:32.677026: step: 368/77, loss: 0.025294139981269836 2023-01-22 08:01:33.963758: step: 372/77, loss: 0.06717772036790848 2023-01-22 08:01:35.326753: step: 376/77, loss: 0.01642705500125885 2023-01-22 08:01:36.629279: step: 380/77, loss: 0.010443812236189842 2023-01-22 08:01:37.947321: step: 384/77, loss: 0.00295252725481987 2023-01-22 08:01:39.256709: step: 388/77, loss: 0.027328480035066605 ================================================== Loss: 0.024 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Chinese: {'template': {'p': 0.9565217391304348, 'r': 0.5238095238095238, 'f1': 0.6769230769230768}, 'slot': {'p': 0.5714285714285714, 'r': 0.010309278350515464, 'f1': 0.020253164556962026}, 'combined': 0.01370983446932814, 'epoch': 7} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 7} Test Korean: {'template': {'p': 0.9558823529411765, 'r': 0.5158730158730159, 'f1': 0.6701030927835052}, 'slot': {'p': 0.5714285714285714, 'r': 0.010309278350515464, 'f1': 0.020253164556962026}, 'combined': 0.013571708208273523, 'epoch': 7} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 7} Test Russian: {'template': {'p': 0.9558823529411765, 'r': 0.5158730158730159, 'f1': 0.6701030927835052}, 'slot': {'p': 0.5714285714285714, 'r': 0.010309278350515464, 'f1': 0.020253164556962026}, 'combined': 0.013571708208273523, 'epoch': 7} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 7} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 7} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 7} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 8 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:03:19.951633: step: 4/77, loss: 0.002951761707663536 2023-01-22 08:03:21.256870: step: 8/77, loss: 0.011420158669352531 2023-01-22 08:03:22.591924: step: 12/77, loss: 0.030093315988779068 2023-01-22 08:03:23.840117: step: 16/77, loss: 0.010848336853086948 2023-01-22 08:03:25.131830: step: 20/77, loss: 0.028082571923732758 2023-01-22 08:03:26.448854: step: 24/77, loss: 0.006531550548970699 2023-01-22 08:03:27.751398: step: 28/77, loss: 0.017639540135860443 2023-01-22 08:03:29.125185: step: 32/77, loss: 0.002651860937476158 2023-01-22 08:03:30.385231: step: 36/77, loss: 0.010134564712643623 2023-01-22 08:03:31.707361: step: 40/77, loss: 0.03160660341382027 2023-01-22 08:03:33.057584: step: 44/77, loss: 0.005230441689491272 2023-01-22 08:03:34.339698: step: 48/77, loss: 0.030870024114847183 2023-01-22 08:03:35.651675: step: 52/77, loss: 0.02877563051879406 2023-01-22 08:03:36.999554: step: 56/77, loss: 0.0063386764377355576 2023-01-22 08:03:38.310448: step: 60/77, loss: 0.03188091516494751 2023-01-22 08:03:39.593758: step: 64/77, loss: 0.010966446250677109 2023-01-22 08:03:40.924525: step: 68/77, loss: 0.002127768937498331 2023-01-22 08:03:42.235310: step: 72/77, loss: 0.0037651783786714077 2023-01-22 08:03:43.515033: step: 76/77, loss: 0.016201186925172806 2023-01-22 08:03:44.808669: step: 80/77, loss: 9.222197695635259e-05 2023-01-22 08:03:46.081591: step: 84/77, loss: 0.024649446830153465 2023-01-22 08:03:47.331913: step: 88/77, loss: 0.0013431230327114463 2023-01-22 08:03:48.650514: step: 92/77, loss: 0.011011897586286068 2023-01-22 08:03:49.963954: step: 96/77, loss: 0.0007366195786744356 2023-01-22 08:03:51.214865: step: 100/77, loss: 0.02774541825056076 2023-01-22 08:03:52.544474: step: 104/77, loss: 0.022737201303243637 2023-01-22 08:03:53.796414: step: 108/77, loss: 0.021142397075891495 2023-01-22 08:03:55.119505: step: 112/77, loss: 0.0162353478372097 2023-01-22 08:03:56.411849: step: 116/77, loss: 0.01265695784240961 2023-01-22 08:03:57.741277: step: 120/77, loss: 0.011829703114926815 2023-01-22 08:03:59.009294: step: 124/77, loss: 0.016484301537275314 2023-01-22 08:04:00.296560: step: 128/77, loss: 0.0007581686368212104 2023-01-22 08:04:01.631276: step: 132/77, loss: 0.0033396773505955935 2023-01-22 08:04:02.972951: step: 136/77, loss: 0.07784155011177063 2023-01-22 08:04:04.274563: step: 140/77, loss: 0.0069819167256355286 2023-01-22 08:04:05.542920: step: 144/77, loss: 0.12047009915113449 2023-01-22 08:04:06.827215: step: 148/77, loss: 0.003101084381341934 2023-01-22 08:04:08.138660: step: 152/77, loss: 0.005879676900804043 2023-01-22 08:04:09.458916: step: 156/77, loss: 0.012600190006196499 2023-01-22 08:04:10.777283: step: 160/77, loss: 0.034457940608263016 2023-01-22 08:04:12.091164: step: 164/77, loss: 0.001940029440447688 2023-01-22 08:04:13.396770: step: 168/77, loss: 0.018188534304499626 2023-01-22 08:04:14.691412: step: 172/77, loss: 0.019673490896821022 2023-01-22 08:04:15.962218: step: 176/77, loss: 0.004080579150468111 2023-01-22 08:04:17.237384: step: 180/77, loss: 0.013569517992436886 2023-01-22 08:04:18.514732: step: 184/77, loss: 0.0006934780394658446 2023-01-22 08:04:19.796235: step: 188/77, loss: 0.001094447448849678 2023-01-22 08:04:21.141985: step: 192/77, loss: 0.016190217807888985 2023-01-22 08:04:22.511995: step: 196/77, loss: 0.021863294765353203 2023-01-22 08:04:23.829615: step: 200/77, loss: 0.01695844903588295 2023-01-22 08:04:25.124470: step: 204/77, loss: 0.038087327033281326 2023-01-22 08:04:26.453028: step: 208/77, loss: 0.02763482555747032 2023-01-22 08:04:27.763196: step: 212/77, loss: 0.02110038697719574 2023-01-22 08:04:29.064843: step: 216/77, loss: 0.038932498544454575 2023-01-22 08:04:30.374079: step: 220/77, loss: 0.026587240397930145 2023-01-22 08:04:31.643533: step: 224/77, loss: 0.00013890476839151233 2023-01-22 08:04:32.959088: step: 228/77, loss: 0.10731155425310135 2023-01-22 08:04:34.231462: step: 232/77, loss: 0.008701834827661514 2023-01-22 08:04:35.524123: step: 236/77, loss: 0.0001420244516339153 2023-01-22 08:04:36.846725: step: 240/77, loss: 0.0021140193566679955 2023-01-22 08:04:38.160012: step: 244/77, loss: 0.00019444481586106122 2023-01-22 08:04:39.463286: step: 248/77, loss: 0.03828136622905731 2023-01-22 08:04:40.793381: step: 252/77, loss: 0.11219515651464462 2023-01-22 08:04:42.147809: step: 256/77, loss: 0.007098965346813202 2023-01-22 08:04:43.425938: step: 260/77, loss: 0.01880680024623871 2023-01-22 08:04:44.707416: step: 264/77, loss: 0.11024859547615051 2023-01-22 08:04:46.016838: step: 268/77, loss: 0.004602618515491486 2023-01-22 08:04:47.264750: step: 272/77, loss: 0.005424637347459793 2023-01-22 08:04:48.558913: step: 276/77, loss: 0.016098979860544205 2023-01-22 08:04:49.859537: step: 280/77, loss: 0.007525038905441761 2023-01-22 08:04:51.199566: step: 284/77, loss: 0.01623627543449402 2023-01-22 08:04:52.489318: step: 288/77, loss: 0.020228806883096695 2023-01-22 08:04:53.771875: step: 292/77, loss: 0.00013010915427003056 2023-01-22 08:04:55.090719: step: 296/77, loss: 0.001430353382602334 2023-01-22 08:04:56.389042: step: 300/77, loss: 0.004590929951518774 2023-01-22 08:04:57.699846: step: 304/77, loss: 0.007614678703248501 2023-01-22 08:04:59.032538: step: 308/77, loss: 0.028848322108387947 2023-01-22 08:05:00.305725: step: 312/77, loss: 0.012136640027165413 2023-01-22 08:05:01.564201: step: 316/77, loss: 0.0143202506005764 2023-01-22 08:05:02.835775: step: 320/77, loss: 0.0036574748810380697 2023-01-22 08:05:04.150660: step: 324/77, loss: 0.014181990176439285 2023-01-22 08:05:05.423272: step: 328/77, loss: 0.047021135687828064 2023-01-22 08:05:06.720814: step: 332/77, loss: 0.0044871168211102486 2023-01-22 08:05:07.985118: step: 336/77, loss: 0.012777280993759632 2023-01-22 08:05:09.310509: step: 340/77, loss: 0.011580890975892544 2023-01-22 08:05:10.639591: step: 344/77, loss: 0.01588420197367668 2023-01-22 08:05:11.934141: step: 348/77, loss: 0.09475565701723099 2023-01-22 08:05:13.214826: step: 352/77, loss: 0.02808484062552452 2023-01-22 08:05:14.505729: step: 356/77, loss: 0.006215309724211693 2023-01-22 08:05:15.818527: step: 360/77, loss: 0.005064602941274643 2023-01-22 08:05:17.155006: step: 364/77, loss: 0.015820614993572235 2023-01-22 08:05:18.432587: step: 368/77, loss: 0.02967149205505848 2023-01-22 08:05:19.768362: step: 372/77, loss: 0.008606133982539177 2023-01-22 08:05:21.069736: step: 376/77, loss: 0.05700630322098732 2023-01-22 08:05:22.366573: step: 380/77, loss: 0.012348588556051254 2023-01-22 08:05:23.681919: step: 384/77, loss: 0.0003217856865376234 2023-01-22 08:05:25.013230: step: 388/77, loss: 0.01081857644021511 ================================================== Loss: 0.020 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5555555555555556, 'f1': 0.7000000000000001}, 'slot': {'p': 0.625, 'r': 0.01718213058419244, 'f1': 0.033444816053511704}, 'combined': 0.023411371237458196, 'epoch': 8} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Korean: {'template': {'p': 0.9459459459459459, 'r': 0.5555555555555556, 'f1': 0.7000000000000001}, 'slot': {'p': 0.6363636363636364, 'r': 0.01804123711340206, 'f1': 0.035087719298245605}, 'combined': 0.024561403508771926, 'epoch': 8} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 8} Test Russian: {'template': {'p': 0.9459459459459459, 'r': 0.5555555555555556, 'f1': 0.7000000000000001}, 'slot': {'p': 0.625, 'r': 0.01718213058419244, 'f1': 0.033444816053511704}, 'combined': 0.023411371237458196, 'epoch': 8} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 8} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 8} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 8} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 9 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:07:05.456030: step: 4/77, loss: 0.008130619302392006 2023-01-22 08:07:06.749555: step: 8/77, loss: 0.0005391405429691076 2023-01-22 08:07:08.040697: step: 12/77, loss: 0.011194856837391853 2023-01-22 08:07:09.322456: step: 16/77, loss: 0.007317467126995325 2023-01-22 08:07:10.670986: step: 20/77, loss: 0.0438869409263134 2023-01-22 08:07:11.944605: step: 24/77, loss: 0.008051110431551933 2023-01-22 08:07:13.232243: step: 28/77, loss: 0.012669816613197327 2023-01-22 08:07:14.479391: step: 32/77, loss: 0.0022855617571622133 2023-01-22 08:07:15.764071: step: 36/77, loss: 0.0064675528556108475 2023-01-22 08:07:17.032160: step: 40/77, loss: 0.001550829503685236 2023-01-22 08:07:18.384813: step: 44/77, loss: 0.01563188061118126 2023-01-22 08:07:19.664601: step: 48/77, loss: 0.008327081799507141 2023-01-22 08:07:20.940261: step: 52/77, loss: 0.008023286238312721 2023-01-22 08:07:22.253986: step: 56/77, loss: 0.003052850253880024 2023-01-22 08:07:23.534857: step: 60/77, loss: 0.04474394768476486 2023-01-22 08:07:24.840960: step: 64/77, loss: 0.009582719765603542 2023-01-22 08:07:26.163215: step: 68/77, loss: 0.01214680913835764 2023-01-22 08:07:27.451096: step: 72/77, loss: 0.0022618744987994432 2023-01-22 08:07:28.756785: step: 76/77, loss: 0.03842214122414589 2023-01-22 08:07:30.034648: step: 80/77, loss: 0.12139374017715454 2023-01-22 08:07:31.401473: step: 84/77, loss: 0.0024616678711026907 2023-01-22 08:07:32.680419: step: 88/77, loss: 0.009868036024272442 2023-01-22 08:07:34.022933: step: 92/77, loss: 0.003964670468121767 2023-01-22 08:07:35.334081: step: 96/77, loss: 0.019232330843806267 2023-01-22 08:07:36.602313: step: 100/77, loss: 0.011210390366613865 2023-01-22 08:07:37.899147: step: 104/77, loss: 0.05151809751987457 2023-01-22 08:07:39.182120: step: 108/77, loss: 0.06597165763378143 2023-01-22 08:07:40.462361: step: 112/77, loss: 0.003417958738282323 2023-01-22 08:07:41.729933: step: 116/77, loss: 0.0020162255968898535 2023-01-22 08:07:43.010622: step: 120/77, loss: 0.03625112771987915 2023-01-22 08:07:44.327395: step: 124/77, loss: 0.021857159212231636 2023-01-22 08:07:45.628149: step: 128/77, loss: 0.015903670340776443 2023-01-22 08:07:46.893490: step: 132/77, loss: 0.004454844631254673 2023-01-22 08:07:48.262460: step: 136/77, loss: 0.011321873404085636 2023-01-22 08:07:49.560257: step: 140/77, loss: 0.07448364049196243 2023-01-22 08:07:50.825053: step: 144/77, loss: 0.025326918810606003 2023-01-22 08:07:52.136852: step: 148/77, loss: 0.00021000817650929093 2023-01-22 08:07:53.463199: step: 152/77, loss: 0.0007694442756474018 2023-01-22 08:07:54.776000: step: 156/77, loss: 0.08295883983373642 2023-01-22 08:07:56.067110: step: 160/77, loss: 0.0010246189776808023 2023-01-22 08:07:57.401807: step: 164/77, loss: 0.016965247690677643 2023-01-22 08:07:58.703013: step: 168/77, loss: 0.011164311319589615 2023-01-22 08:08:00.023029: step: 172/77, loss: 0.0428466722369194 2023-01-22 08:08:01.341690: step: 176/77, loss: 0.043888527899980545 2023-01-22 08:08:02.715545: step: 180/77, loss: 0.03227125480771065 2023-01-22 08:08:03.983076: step: 184/77, loss: 0.01470979955047369 2023-01-22 08:08:05.297040: step: 188/77, loss: 0.007358902599662542 2023-01-22 08:08:06.583923: step: 192/77, loss: 0.02829001471400261 2023-01-22 08:08:07.844244: step: 196/77, loss: 0.006120836362242699 2023-01-22 08:08:09.131136: step: 200/77, loss: 0.017666509374976158 2023-01-22 08:08:10.443211: step: 204/77, loss: 0.007887563668191433 2023-01-22 08:08:11.772406: step: 208/77, loss: 0.07387572526931763 2023-01-22 08:08:13.081327: step: 212/77, loss: 0.00548297306522727 2023-01-22 08:08:14.358749: step: 216/77, loss: 0.14394663274288177 2023-01-22 08:08:15.671915: step: 220/77, loss: 0.0017406389815732837 2023-01-22 08:08:16.996205: step: 224/77, loss: 0.008252009749412537 2023-01-22 08:08:18.299587: step: 228/77, loss: 0.005103731993585825 2023-01-22 08:08:19.608638: step: 232/77, loss: 0.015911363065242767 2023-01-22 08:08:20.918399: step: 236/77, loss: 0.0036538285203278065 2023-01-22 08:08:22.253679: step: 240/77, loss: 0.0744553878903389 2023-01-22 08:08:23.542108: step: 244/77, loss: 0.0005846361164003611 2023-01-22 08:08:24.818831: step: 248/77, loss: 0.003879360156133771 2023-01-22 08:08:26.181549: step: 252/77, loss: 4.1820159822236747e-05 2023-01-22 08:08:27.481013: step: 256/77, loss: 0.05694466084241867 2023-01-22 08:08:28.772367: step: 260/77, loss: 0.00016159679216798395 2023-01-22 08:08:30.121294: step: 264/77, loss: 0.003600452793762088 2023-01-22 08:08:31.412354: step: 268/77, loss: 0.005282798781991005 2023-01-22 08:08:32.740569: step: 272/77, loss: 0.1386595070362091 2023-01-22 08:08:34.045890: step: 276/77, loss: 0.10414771735668182 2023-01-22 08:08:35.407312: step: 280/77, loss: 0.006839843932539225 2023-01-22 08:08:36.717906: step: 284/77, loss: 0.0015076743438839912 2023-01-22 08:08:38.060298: step: 288/77, loss: 0.004393836483359337 2023-01-22 08:08:39.380416: step: 292/77, loss: 0.007599594071507454 2023-01-22 08:08:40.712226: step: 296/77, loss: 0.03950433060526848 2023-01-22 08:08:42.009802: step: 300/77, loss: 0.009844477288424969 2023-01-22 08:08:43.333755: step: 304/77, loss: 0.0013913114089518785 2023-01-22 08:08:44.611193: step: 308/77, loss: 0.12179585546255112 2023-01-22 08:08:45.916748: step: 312/77, loss: 0.005494131240993738 2023-01-22 08:08:47.210497: step: 316/77, loss: 0.09123729169368744 2023-01-22 08:08:48.536109: step: 320/77, loss: 0.011966943740844727 2023-01-22 08:08:49.867562: step: 324/77, loss: 0.00355300260707736 2023-01-22 08:08:51.220179: step: 328/77, loss: 0.06924528628587723 2023-01-22 08:08:52.562939: step: 332/77, loss: 0.13030168414115906 2023-01-22 08:08:53.886843: step: 336/77, loss: 0.04786805436015129 2023-01-22 08:08:55.200172: step: 340/77, loss: 0.05078596621751785 2023-01-22 08:08:56.575641: step: 344/77, loss: 0.011326906271278858 2023-01-22 08:08:57.876388: step: 348/77, loss: 0.028522998094558716 2023-01-22 08:08:59.222439: step: 352/77, loss: 0.06331083178520203 2023-01-22 08:09:00.533843: step: 356/77, loss: 0.003408285556361079 2023-01-22 08:09:01.875938: step: 360/77, loss: 0.011288869194686413 2023-01-22 08:09:03.202735: step: 364/77, loss: 0.030668139457702637 2023-01-22 08:09:04.494766: step: 368/77, loss: 0.0004308921634219587 2023-01-22 08:09:05.777915: step: 372/77, loss: 0.00032320560421794653 2023-01-22 08:09:07.066000: step: 376/77, loss: 0.045046959072351456 2023-01-22 08:09:08.407302: step: 380/77, loss: 0.11515285074710846 2023-01-22 08:09:09.715253: step: 384/77, loss: 0.0003654747852124274 2023-01-22 08:09:11.076166: step: 388/77, loss: 0.021653490141034126 ================================================== Loss: 0.027 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 9} Test Chinese: {'template': {'p': 0.984375, 'r': 0.5, 'f1': 0.6631578947368421}, 'slot': {'p': 0.72, 'r': 0.015463917525773196, 'f1': 0.030277544154751895}, 'combined': 0.020078792439467044, 'epoch': 9} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 9} Test Korean: {'template': {'p': 0.9841269841269841, 'r': 0.49206349206349204, 'f1': 0.656084656084656}, 'slot': {'p': 0.72, 'r': 0.015463917525773196, 'f1': 0.030277544154751895}, 'combined': 0.019864632143858384, 'epoch': 9} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 9} Test Russian: {'template': {'p': 0.9841269841269841, 'r': 0.49206349206349204, 'f1': 0.656084656084656}, 'slot': {'p': 0.72, 'r': 0.015463917525773196, 'f1': 0.030277544154751895}, 'combined': 0.019864632143858384, 'epoch': 9} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 9} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 9} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 9} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 10 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:10:51.723524: step: 4/77, loss: 0.028790917247533798 2023-01-22 08:10:53.009180: step: 8/77, loss: 0.006226943340152502 2023-01-22 08:10:54.333127: step: 12/77, loss: 0.004163344856351614 2023-01-22 08:10:55.610429: step: 16/77, loss: 0.0036746724508702755 2023-01-22 08:10:56.915946: step: 20/77, loss: 0.052779559046030045 2023-01-22 08:10:58.219924: step: 24/77, loss: 0.04778440296649933 2023-01-22 08:10:59.486005: step: 28/77, loss: 0.023326139897108078 2023-01-22 08:11:00.739529: step: 32/77, loss: 0.0023849578574299812 2023-01-22 08:11:02.007803: step: 36/77, loss: 0.012747220695018768 2023-01-22 08:11:03.269024: step: 40/77, loss: 0.0034248887095600367 2023-01-22 08:11:04.578087: step: 44/77, loss: 0.0010681729763746262 2023-01-22 08:11:05.834312: step: 48/77, loss: 0.007092490326613188 2023-01-22 08:11:07.117416: step: 52/77, loss: 0.028929613530635834 2023-01-22 08:11:08.390044: step: 56/77, loss: 0.003579456824809313 2023-01-22 08:11:09.740786: step: 60/77, loss: 0.007527029141783714 2023-01-22 08:11:11.045676: step: 64/77, loss: 0.031638361513614655 2023-01-22 08:11:12.313132: step: 68/77, loss: 0.02865629829466343 2023-01-22 08:11:13.576926: step: 72/77, loss: 0.03119928203523159 2023-01-22 08:11:14.906843: step: 76/77, loss: 0.0003292355395387858 2023-01-22 08:11:16.175812: step: 80/77, loss: 0.010797802358865738 2023-01-22 08:11:17.469813: step: 84/77, loss: 0.0030798588413745165 2023-01-22 08:11:18.735543: step: 88/77, loss: 0.0003887184429913759 2023-01-22 08:11:20.009817: step: 92/77, loss: 0.009819199331104755 2023-01-22 08:11:21.341748: step: 96/77, loss: 0.0035490067675709724 2023-01-22 08:11:22.672297: step: 100/77, loss: 0.0005593973910436034 2023-01-22 08:11:23.970420: step: 104/77, loss: 0.004988205153495073 2023-01-22 08:11:25.306036: step: 108/77, loss: 0.0018749493174254894 2023-01-22 08:11:26.597805: step: 112/77, loss: 0.01050441525876522 2023-01-22 08:11:27.912979: step: 116/77, loss: 0.0016917268512770534 2023-01-22 08:11:29.236793: step: 120/77, loss: 0.013261470943689346 2023-01-22 08:11:30.534928: step: 124/77, loss: 0.0017507218290120363 2023-01-22 08:11:31.887876: step: 128/77, loss: 0.006236909423023462 2023-01-22 08:11:33.183641: step: 132/77, loss: 0.021041784435510635 2023-01-22 08:11:34.451069: step: 136/77, loss: 0.03406795114278793 2023-01-22 08:11:35.733389: step: 140/77, loss: 0.026158465072512627 2023-01-22 08:11:37.019993: step: 144/77, loss: 0.002985805505886674 2023-01-22 08:11:38.324752: step: 148/77, loss: 0.004404544830322266 2023-01-22 08:11:39.633528: step: 152/77, loss: 0.004968182649463415 2023-01-22 08:11:40.959085: step: 156/77, loss: 0.03685709089040756 2023-01-22 08:11:42.261332: step: 160/77, loss: 0.002346716821193695 2023-01-22 08:11:43.562414: step: 164/77, loss: 6.125450454419479e-05 2023-01-22 08:11:44.922839: step: 168/77, loss: 0.006016855128109455 2023-01-22 08:11:46.294545: step: 172/77, loss: 0.01733742654323578 2023-01-22 08:11:47.647135: step: 176/77, loss: 0.08007363975048065 2023-01-22 08:11:48.955874: step: 180/77, loss: 0.013454221189022064 2023-01-22 08:11:50.262250: step: 184/77, loss: 0.023909416049718857 2023-01-22 08:11:51.548093: step: 188/77, loss: 0.01868433691561222 2023-01-22 08:11:52.812334: step: 192/77, loss: 0.0005307840183377266 2023-01-22 08:11:54.122080: step: 196/77, loss: 0.01805320382118225 2023-01-22 08:11:55.409918: step: 200/77, loss: 0.0010465634986758232 2023-01-22 08:11:56.708804: step: 204/77, loss: 0.03729572519659996 2023-01-22 08:11:57.984425: step: 208/77, loss: 0.014979223720729351 2023-01-22 08:11:59.252356: step: 212/77, loss: 0.03879925236105919 2023-01-22 08:12:00.537791: step: 216/77, loss: 0.02371007576584816 2023-01-22 08:12:01.815430: step: 220/77, loss: 0.007413984276354313 2023-01-22 08:12:03.133555: step: 224/77, loss: 4.5996031985851005e-05 2023-01-22 08:12:04.435769: step: 228/77, loss: 0.009440948255360126 2023-01-22 08:12:05.707977: step: 232/77, loss: 0.02789357490837574 2023-01-22 08:12:06.989284: step: 236/77, loss: 0.0020327758975327015 2023-01-22 08:12:08.288132: step: 240/77, loss: 0.005369645543396473 2023-01-22 08:12:09.541229: step: 244/77, loss: 0.010713733732700348 2023-01-22 08:12:10.857439: step: 248/77, loss: 0.0014789579436182976 2023-01-22 08:12:12.150197: step: 252/77, loss: 0.00473722442984581 2023-01-22 08:12:13.444582: step: 256/77, loss: 0.014124809764325619 2023-01-22 08:12:14.768936: step: 260/77, loss: 0.01643744297325611 2023-01-22 08:12:16.073516: step: 264/77, loss: 0.0024889023043215275 2023-01-22 08:12:17.401581: step: 268/77, loss: 0.003109875600785017 2023-01-22 08:12:18.664923: step: 272/77, loss: 0.012875061482191086 2023-01-22 08:12:19.986533: step: 276/77, loss: 0.0017921538092195988 2023-01-22 08:12:21.294361: step: 280/77, loss: 0.01875830627977848 2023-01-22 08:12:22.605467: step: 284/77, loss: 0.003374907886609435 2023-01-22 08:12:23.913362: step: 288/77, loss: 0.0016585986595600843 2023-01-22 08:12:25.242026: step: 292/77, loss: 0.00521429255604744 2023-01-22 08:12:26.569897: step: 296/77, loss: 0.019124671816825867 2023-01-22 08:12:27.904790: step: 300/77, loss: 0.009553453885018826 2023-01-22 08:12:29.216403: step: 304/77, loss: 0.0073772757314145565 2023-01-22 08:12:30.513611: step: 308/77, loss: 0.011843051761388779 2023-01-22 08:12:31.816592: step: 312/77, loss: 0.03154919296503067 2023-01-22 08:12:33.169532: step: 316/77, loss: 0.06556575000286102 2023-01-22 08:12:34.476329: step: 320/77, loss: 0.04244726896286011 2023-01-22 08:12:35.799665: step: 324/77, loss: 0.006225625053048134 2023-01-22 08:12:37.102020: step: 328/77, loss: 0.0034917141310870647 2023-01-22 08:12:38.385470: step: 332/77, loss: 0.010496101342141628 2023-01-22 08:12:39.722972: step: 336/77, loss: 0.024124911054968834 2023-01-22 08:12:41.005511: step: 340/77, loss: 0.007168466225266457 2023-01-22 08:12:42.303159: step: 344/77, loss: 0.004895597230643034 2023-01-22 08:12:43.579315: step: 348/77, loss: 0.06963392347097397 2023-01-22 08:12:44.854382: step: 352/77, loss: 0.025621812790632248 2023-01-22 08:12:46.186825: step: 356/77, loss: 0.010994499549269676 2023-01-22 08:12:47.476933: step: 360/77, loss: 0.0767049491405487 2023-01-22 08:12:48.783715: step: 364/77, loss: 0.02383904531598091 2023-01-22 08:12:50.141375: step: 368/77, loss: 0.004664411302655935 2023-01-22 08:12:51.451535: step: 372/77, loss: 0.009664268232882023 2023-01-22 08:12:52.747513: step: 376/77, loss: 0.04587193951010704 2023-01-22 08:12:54.058433: step: 380/77, loss: 0.0028117981273680925 2023-01-22 08:12:55.339610: step: 384/77, loss: 0.00421349611133337 2023-01-22 08:12:56.648110: step: 388/77, loss: 0.008075060322880745 ================================================== Loss: 0.015 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Chinese: {'template': {'p': 0.9538461538461539, 'r': 0.49206349206349204, 'f1': 0.6492146596858639}, 'slot': {'p': 0.625, 'r': 0.01718213058419244, 'f1': 0.033444816053511704}, 'combined': 0.02171286487243692, 'epoch': 10} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Korean: {'template': {'p': 0.9538461538461539, 'r': 0.49206349206349204, 'f1': 0.6492146596858639}, 'slot': {'p': 0.65625, 'r': 0.01804123711340206, 'f1': 0.03511705685618729}, 'combined': 0.022798508116058765, 'epoch': 10} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 10} Test Russian: {'template': {'p': 0.953125, 'r': 0.48412698412698413, 'f1': 0.6421052631578947}, 'slot': {'p': 0.625, 'r': 0.01718213058419244, 'f1': 0.033444816053511704}, 'combined': 0.021475092413307514, 'epoch': 10} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 10} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 10} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 10} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 11 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:14:36.498334: step: 4/77, loss: 0.015952421352267265 2023-01-22 08:14:37.781733: step: 8/77, loss: 0.00025507123791612685 2023-01-22 08:14:39.066992: step: 12/77, loss: 0.0044345613569021225 2023-01-22 08:14:40.333845: step: 16/77, loss: 0.0034365570172667503 2023-01-22 08:14:41.605474: step: 20/77, loss: 0.00975167378783226 2023-01-22 08:14:42.919190: step: 24/77, loss: 0.0033859300892800093 2023-01-22 08:14:44.250208: step: 28/77, loss: 0.0035383105278015137 2023-01-22 08:14:45.575888: step: 32/77, loss: 0.02958233840763569 2023-01-22 08:14:46.883146: step: 36/77, loss: 0.006568643264472485 2023-01-22 08:14:48.194213: step: 40/77, loss: 0.007371163927018642 2023-01-22 08:14:49.460894: step: 44/77, loss: 0.026065394282341003 2023-01-22 08:14:50.742642: step: 48/77, loss: 0.12206071615219116 2023-01-22 08:14:52.034178: step: 52/77, loss: 0.019574757665395737 2023-01-22 08:14:53.294846: step: 56/77, loss: 0.004011745098978281 2023-01-22 08:14:54.638373: step: 60/77, loss: 0.00427105650305748 2023-01-22 08:14:55.989101: step: 64/77, loss: 0.045897383242845535 2023-01-22 08:14:57.282296: step: 68/77, loss: 0.0007205940200947225 2023-01-22 08:14:58.611060: step: 72/77, loss: 0.014391254633665085 2023-01-22 08:14:59.912734: step: 76/77, loss: 0.00022145872935652733 2023-01-22 08:15:01.238192: step: 80/77, loss: 0.008357521146535873 2023-01-22 08:15:02.563072: step: 84/77, loss: 0.04798451066017151 2023-01-22 08:15:03.852371: step: 88/77, loss: 0.019816333428025246 2023-01-22 08:15:05.083944: step: 92/77, loss: 0.030351610854268074 2023-01-22 08:15:06.382363: step: 96/77, loss: 0.002286176895722747 2023-01-22 08:15:07.723293: step: 100/77, loss: 0.02866881527006626 2023-01-22 08:15:08.992324: step: 104/77, loss: 0.01667320542037487 2023-01-22 08:15:10.294289: step: 108/77, loss: 0.01816210336983204 2023-01-22 08:15:11.592005: step: 112/77, loss: 0.009158037602901459 2023-01-22 08:15:12.900543: step: 116/77, loss: 0.023456105962395668 2023-01-22 08:15:14.179990: step: 120/77, loss: 0.001066570752300322 2023-01-22 08:15:15.484515: step: 124/77, loss: 0.02048538438975811 2023-01-22 08:15:16.794200: step: 128/77, loss: 0.01709677465260029 2023-01-22 08:15:18.110909: step: 132/77, loss: 0.00894573051482439 2023-01-22 08:15:19.439787: step: 136/77, loss: 0.00696770241484046 2023-01-22 08:15:20.717797: step: 140/77, loss: 0.019813766703009605 2023-01-22 08:15:22.106235: step: 144/77, loss: 0.013972686603665352 2023-01-22 08:15:23.451791: step: 148/77, loss: 0.025065403431653976 2023-01-22 08:15:24.774058: step: 152/77, loss: 0.011014866642653942 2023-01-22 08:15:26.066871: step: 156/77, loss: 0.02622900903224945 2023-01-22 08:15:27.323220: step: 160/77, loss: 0.0027283949311822653 2023-01-22 08:15:28.600065: step: 164/77, loss: 2.7721842343453318e-05 2023-01-22 08:15:29.917602: step: 168/77, loss: 0.025296106934547424 2023-01-22 08:15:31.178047: step: 172/77, loss: 0.004019709303975105 2023-01-22 08:15:32.494782: step: 176/77, loss: 0.009642662480473518 2023-01-22 08:15:33.785096: step: 180/77, loss: 0.007437935099005699 2023-01-22 08:15:35.105605: step: 184/77, loss: 0.00015852319484110922 2023-01-22 08:15:36.434324: step: 188/77, loss: 0.008070101030170918 2023-01-22 08:15:37.741137: step: 192/77, loss: 0.005517785437405109 2023-01-22 08:15:39.062263: step: 196/77, loss: 0.009717223234474659 2023-01-22 08:15:40.345718: step: 200/77, loss: 0.16438964009284973 2023-01-22 08:15:41.660870: step: 204/77, loss: 0.0046204449608922005 2023-01-22 08:15:42.934926: step: 208/77, loss: 0.004777129739522934 2023-01-22 08:15:44.257189: step: 212/77, loss: 0.0024582187179476023 2023-01-22 08:15:45.528013: step: 216/77, loss: 0.0029221922159194946 2023-01-22 08:15:46.864724: step: 220/77, loss: 0.032576870173215866 2023-01-22 08:15:48.161780: step: 224/77, loss: 0.019355500116944313 2023-01-22 08:15:49.505276: step: 228/77, loss: 0.013398583978414536 2023-01-22 08:15:50.870983: step: 232/77, loss: 5.370080907596275e-05 2023-01-22 08:15:52.161869: step: 236/77, loss: 0.014454700984060764 2023-01-22 08:15:53.448549: step: 240/77, loss: 0.05621044710278511 2023-01-22 08:15:54.717895: step: 244/77, loss: 0.009606147184967995 2023-01-22 08:15:55.991506: step: 248/77, loss: 0.007845384068787098 2023-01-22 08:15:57.350337: step: 252/77, loss: 0.009262886829674244 2023-01-22 08:15:58.668913: step: 256/77, loss: 0.08676369488239288 2023-01-22 08:15:59.968884: step: 260/77, loss: 0.01982366479933262 2023-01-22 08:16:01.299916: step: 264/77, loss: 0.06730242818593979 2023-01-22 08:16:02.671483: step: 268/77, loss: 0.0041349404491484165 2023-01-22 08:16:03.955769: step: 272/77, loss: 0.004261372610926628 2023-01-22 08:16:05.299828: step: 276/77, loss: 0.015395049005746841 2023-01-22 08:16:06.595678: step: 280/77, loss: 0.005712614394724369 2023-01-22 08:16:07.954243: step: 284/77, loss: 0.00669367890805006 2023-01-22 08:16:09.254809: step: 288/77, loss: 0.03226980194449425 2023-01-22 08:16:10.542989: step: 292/77, loss: 0.00047052899026311934 2023-01-22 08:16:11.862506: step: 296/77, loss: 0.005096997134387493 2023-01-22 08:16:13.176565: step: 300/77, loss: 0.03474399447441101 2023-01-22 08:16:14.434862: step: 304/77, loss: 0.00014382918016053736 2023-01-22 08:16:15.754840: step: 308/77, loss: 0.0004964787513017654 2023-01-22 08:16:17.036301: step: 312/77, loss: 0.014115167781710625 2023-01-22 08:16:18.339743: step: 316/77, loss: 0.08715243637561798 2023-01-22 08:16:19.662164: step: 320/77, loss: 0.007384343538433313 2023-01-22 08:16:20.959485: step: 324/77, loss: 0.05926787108182907 2023-01-22 08:16:22.292735: step: 328/77, loss: 0.0005082663847133517 2023-01-22 08:16:23.593826: step: 332/77, loss: 0.008011923171579838 2023-01-22 08:16:24.971317: step: 336/77, loss: 0.02457595057785511 2023-01-22 08:16:26.301035: step: 340/77, loss: 0.05150618776679039 2023-01-22 08:16:27.618927: step: 344/77, loss: 0.010762691497802734 2023-01-22 08:16:28.935437: step: 348/77, loss: 0.016093550249934196 2023-01-22 08:16:30.237352: step: 352/77, loss: 0.002383376471698284 2023-01-22 08:16:31.528950: step: 356/77, loss: 0.005355801433324814 2023-01-22 08:16:32.845738: step: 360/77, loss: 0.011093618348240852 2023-01-22 08:16:34.168518: step: 364/77, loss: 7.855845615267754e-05 2023-01-22 08:16:35.499078: step: 368/77, loss: 7.87553217378445e-05 2023-01-22 08:16:36.862056: step: 372/77, loss: 0.00825677439570427 2023-01-22 08:16:38.205263: step: 376/77, loss: 0.051966484636068344 2023-01-22 08:16:39.543157: step: 380/77, loss: 0.01855085790157318 2023-01-22 08:16:40.850445: step: 384/77, loss: 0.02410942129790783 2023-01-22 08:16:42.170173: step: 388/77, loss: 8.27374606160447e-05 ================================================== Loss: 0.018 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Chinese: {'template': {'p': 0.9583333333333334, 'r': 0.5476190476190477, 'f1': 0.696969696969697}, 'slot': {'p': 0.5882352941176471, 'r': 0.01718213058419244, 'f1': 0.0333889816360601}, 'combined': 0.023271108413011585, 'epoch': 11} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Korean: {'template': {'p': 0.9583333333333334, 'r': 0.5476190476190477, 'f1': 0.696969696969697}, 'slot': {'p': 0.5882352941176471, 'r': 0.01718213058419244, 'f1': 0.0333889816360601}, 'combined': 0.023271108413011585, 'epoch': 11} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 11} Test Russian: {'template': {'p': 0.9583333333333334, 'r': 0.5476190476190477, 'f1': 0.696969696969697}, 'slot': {'p': 0.5882352941176471, 'r': 0.01718213058419244, 'f1': 0.0333889816360601}, 'combined': 0.023271108413011585, 'epoch': 11} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 11} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 11} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 11} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 12 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:18:23.213169: step: 4/77, loss: 0.0001822328777052462 2023-01-22 08:18:24.507060: step: 8/77, loss: 0.0008595406543463469 2023-01-22 08:18:25.783152: step: 12/77, loss: 0.0023720674216747284 2023-01-22 08:18:27.052747: step: 16/77, loss: 0.0035153913777321577 2023-01-22 08:18:28.359934: step: 20/77, loss: 0.0032651075161993504 2023-01-22 08:18:29.667849: step: 24/77, loss: 0.006483836565166712 2023-01-22 08:18:30.965380: step: 28/77, loss: 0.0008740816847421229 2023-01-22 08:18:32.330319: step: 32/77, loss: 0.03994395583868027 2023-01-22 08:18:33.630288: step: 36/77, loss: 0.000109150554635562 2023-01-22 08:18:34.913121: step: 40/77, loss: 0.000269919604761526 2023-01-22 08:18:36.223136: step: 44/77, loss: 0.012205511331558228 2023-01-22 08:18:37.522120: step: 48/77, loss: 0.001035436987876892 2023-01-22 08:18:38.796275: step: 52/77, loss: 0.018521424382925034 2023-01-22 08:18:40.070203: step: 56/77, loss: 0.013126850128173828 2023-01-22 08:18:41.365470: step: 60/77, loss: 0.005567711777985096 2023-01-22 08:18:42.659040: step: 64/77, loss: 0.03030678629875183 2023-01-22 08:18:43.952580: step: 68/77, loss: 0.003879829775542021 2023-01-22 08:18:45.240205: step: 72/77, loss: 0.025746623054146767 2023-01-22 08:18:46.576569: step: 76/77, loss: 0.011330823414027691 2023-01-22 08:18:47.873931: step: 80/77, loss: 0.0010732869850471616 2023-01-22 08:18:49.218052: step: 84/77, loss: 0.06254440546035767 2023-01-22 08:18:50.540351: step: 88/77, loss: 0.000326181179843843 2023-01-22 08:18:51.814778: step: 92/77, loss: 0.053866542875766754 2023-01-22 08:18:53.113990: step: 96/77, loss: 0.002246356336399913 2023-01-22 08:18:54.374614: step: 100/77, loss: 0.012330463156104088 2023-01-22 08:18:55.632917: step: 104/77, loss: 0.03553637117147446 2023-01-22 08:18:56.872999: step: 108/77, loss: 0.00018002027354668826 2023-01-22 08:18:58.140789: step: 112/77, loss: 0.023753199726343155 2023-01-22 08:18:59.433536: step: 116/77, loss: 0.001026954036206007 2023-01-22 08:19:00.753037: step: 120/77, loss: 0.022113390266895294 2023-01-22 08:19:02.101045: step: 124/77, loss: 0.0218508280813694 2023-01-22 08:19:03.363938: step: 128/77, loss: 0.0010070583084598184 2023-01-22 08:19:04.659636: step: 132/77, loss: 0.00040107598761096597 2023-01-22 08:19:05.976435: step: 136/77, loss: 0.04117031395435333 2023-01-22 08:19:07.233725: step: 140/77, loss: 0.00023497387883253396 2023-01-22 08:19:08.519444: step: 144/77, loss: 0.014612888917326927 2023-01-22 08:19:09.786365: step: 148/77, loss: 0.0343783013522625 2023-01-22 08:19:11.086946: step: 152/77, loss: 0.0012751361355185509 2023-01-22 08:19:12.450053: step: 156/77, loss: 0.028205927461385727 2023-01-22 08:19:13.764417: step: 160/77, loss: 0.0010938920313492417 2023-01-22 08:19:15.110516: step: 164/77, loss: 0.00888506043702364 2023-01-22 08:19:16.370333: step: 168/77, loss: 0.0007671800558455288 2023-01-22 08:19:17.672467: step: 172/77, loss: 0.018024412915110588 2023-01-22 08:19:18.951584: step: 176/77, loss: 0.00041163599235005677 2023-01-22 08:19:20.250210: step: 180/77, loss: 0.028581075370311737 2023-01-22 08:19:21.533976: step: 184/77, loss: 0.007999785244464874 2023-01-22 08:19:22.820679: step: 188/77, loss: 0.0047058360651135445 2023-01-22 08:19:24.118552: step: 192/77, loss: 0.007076134905219078 2023-01-22 08:19:25.438607: step: 196/77, loss: 0.021052701398730278 2023-01-22 08:19:26.794750: step: 200/77, loss: 0.09484566748142242 2023-01-22 08:19:28.140582: step: 204/77, loss: 0.0005660290480591357 2023-01-22 08:19:29.440546: step: 208/77, loss: 0.02472056820988655 2023-01-22 08:19:30.730353: step: 212/77, loss: 0.006819822359830141 2023-01-22 08:19:32.048606: step: 216/77, loss: 0.0009203726658597589 2023-01-22 08:19:33.366408: step: 220/77, loss: 0.001978323794901371 2023-01-22 08:19:34.680429: step: 224/77, loss: 0.0022479835897684097 2023-01-22 08:19:35.976425: step: 228/77, loss: 0.0005001539830118418 2023-01-22 08:19:37.339053: step: 232/77, loss: 0.0036140254233032465 2023-01-22 08:19:38.590431: step: 236/77, loss: 0.018563125282526016 2023-01-22 08:19:39.894668: step: 240/77, loss: 0.00011719368922058493 2023-01-22 08:19:41.136902: step: 244/77, loss: 0.0018171144183725119 2023-01-22 08:19:42.431597: step: 248/77, loss: 3.602403012337163e-05 2023-01-22 08:19:43.750375: step: 252/77, loss: 2.7164055609318893e-06 2023-01-22 08:19:45.012344: step: 256/77, loss: 0.0027795173227787018 2023-01-22 08:19:46.300619: step: 260/77, loss: 0.05980942025780678 2023-01-22 08:19:47.610225: step: 264/77, loss: 0.002729016589000821 2023-01-22 08:19:48.981650: step: 268/77, loss: 0.0014211706584319472 2023-01-22 08:19:50.292594: step: 272/77, loss: 0.0069159846752882 2023-01-22 08:19:51.625952: step: 276/77, loss: 2.6452304155100137e-05 2023-01-22 08:19:52.925728: step: 280/77, loss: 0.0017069733003154397 2023-01-22 08:19:54.281874: step: 284/77, loss: 0.05138189718127251 2023-01-22 08:19:55.525494: step: 288/77, loss: 0.007211971562355757 2023-01-22 08:19:56.767384: step: 292/77, loss: 0.001633265521377325 2023-01-22 08:19:58.037902: step: 296/77, loss: 0.037424977868795395 2023-01-22 08:19:59.348440: step: 300/77, loss: 0.001969374716281891 2023-01-22 08:20:00.668275: step: 304/77, loss: 0.0008761576609686017 2023-01-22 08:20:01.944017: step: 308/77, loss: 0.0007793945842422545 2023-01-22 08:20:03.303840: step: 312/77, loss: 0.018580691888928413 2023-01-22 08:20:04.554767: step: 316/77, loss: 0.0016993844183161855 2023-01-22 08:20:05.798455: step: 320/77, loss: 0.0018329013837501407 2023-01-22 08:20:07.101118: step: 324/77, loss: 0.0004931208095513284 2023-01-22 08:20:08.446147: step: 328/77, loss: 8.924649591790512e-05 2023-01-22 08:20:09.734972: step: 332/77, loss: 0.03662414103746414 2023-01-22 08:20:11.066835: step: 336/77, loss: 0.025226496160030365 2023-01-22 08:20:12.304728: step: 340/77, loss: 0.013443954288959503 2023-01-22 08:20:13.618515: step: 344/77, loss: 0.04253612831234932 2023-01-22 08:20:14.937742: step: 348/77, loss: 0.008895068429410458 2023-01-22 08:20:16.287886: step: 352/77, loss: 0.008363965898752213 2023-01-22 08:20:17.585015: step: 356/77, loss: 0.0023347344249486923 2023-01-22 08:20:18.896056: step: 360/77, loss: 0.0045925406739115715 2023-01-22 08:20:20.182279: step: 364/77, loss: 0.00017636397387832403 2023-01-22 08:20:21.515498: step: 368/77, loss: 0.009718219749629498 2023-01-22 08:20:22.853027: step: 372/77, loss: 0.009809928946197033 2023-01-22 08:20:24.150613: step: 376/77, loss: 4.1718150896485895e-05 2023-01-22 08:20:25.445104: step: 380/77, loss: 0.034828729927539825 2023-01-22 08:20:26.772549: step: 384/77, loss: 0.005780613049864769 2023-01-22 08:20:28.046938: step: 388/77, loss: 0.0001409717951901257 ================================================== Loss: 0.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Chinese: {'template': {'p': 0.9696969696969697, 'r': 0.5079365079365079, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.01804123711340206, 'f1': 0.03499999999999999}, 'combined': 0.023333333333333324, 'epoch': 12} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Korean: {'template': {'p': 0.9696969696969697, 'r': 0.5079365079365079, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.01804123711340206, 'f1': 0.03499999999999999}, 'combined': 0.023333333333333324, 'epoch': 12} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 12} Test Russian: {'template': {'p': 0.9701492537313433, 'r': 0.5158730158730159, 'f1': 0.6735751295336788}, 'slot': {'p': 0.5675675675675675, 'r': 0.01804123711340206, 'f1': 0.03497085761865112}, 'combined': 0.023555499950386766, 'epoch': 12} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 12} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 12} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 12} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 13 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:22:08.902582: step: 4/77, loss: 0.004952570889145136 2023-01-22 08:22:10.213939: step: 8/77, loss: 0.00018064001051243395 2023-01-22 08:22:11.546886: step: 12/77, loss: 0.08218786865472794 2023-01-22 08:22:12.825387: step: 16/77, loss: 0.025318488478660583 2023-01-22 08:22:14.176350: step: 20/77, loss: 0.0022081886418163776 2023-01-22 08:22:15.504282: step: 24/77, loss: 0.017943235114216805 2023-01-22 08:22:16.793007: step: 28/77, loss: 8.053469355218112e-05 2023-01-22 08:22:18.121609: step: 32/77, loss: 0.00113627128303051 2023-01-22 08:22:19.435317: step: 36/77, loss: 4.863796493737027e-05 2023-01-22 08:22:20.729348: step: 40/77, loss: 0.013745257630944252 2023-01-22 08:22:22.008525: step: 44/77, loss: 5.027527367928997e-05 2023-01-22 08:22:23.266021: step: 48/77, loss: 0.02231896109879017 2023-01-22 08:22:24.536892: step: 52/77, loss: 0.005378996953368187 2023-01-22 08:22:25.830724: step: 56/77, loss: 0.1697475165128708 2023-01-22 08:22:27.122300: step: 60/77, loss: 0.0009837471880018711 2023-01-22 08:22:28.402454: step: 64/77, loss: 0.000497913861181587 2023-01-22 08:22:29.734340: step: 68/77, loss: 0.0005649970844388008 2023-01-22 08:22:31.047444: step: 72/77, loss: 0.005117279943078756 2023-01-22 08:22:32.356197: step: 76/77, loss: 0.0029834150336682796 2023-01-22 08:22:33.658718: step: 80/77, loss: 0.0036763963289558887 2023-01-22 08:22:34.952617: step: 84/77, loss: 0.002525686053559184 2023-01-22 08:22:36.246647: step: 88/77, loss: 0.030388498678803444 2023-01-22 08:22:37.506028: step: 92/77, loss: 0.04918051138520241 2023-01-22 08:22:38.774281: step: 96/77, loss: 0.01887078583240509 2023-01-22 08:22:40.078098: step: 100/77, loss: 0.005926585290580988 2023-01-22 08:22:41.430296: step: 104/77, loss: 0.004645318258553743 2023-01-22 08:22:42.734684: step: 108/77, loss: 0.008219257928431034 2023-01-22 08:22:44.029676: step: 112/77, loss: 0.00023844148381613195 2023-01-22 08:22:45.358833: step: 116/77, loss: 0.0098537253215909 2023-01-22 08:22:46.622557: step: 120/77, loss: 0.00490536680445075 2023-01-22 08:22:47.917024: step: 124/77, loss: 0.0024474281817674637 2023-01-22 08:22:49.238508: step: 128/77, loss: 0.014606594108045101 2023-01-22 08:22:50.551282: step: 132/77, loss: 0.00835402775555849 2023-01-22 08:22:51.858052: step: 136/77, loss: 0.3284963369369507 2023-01-22 08:22:53.219978: step: 140/77, loss: 0.003163279267027974 2023-01-22 08:22:54.563420: step: 144/77, loss: 0.0027168530505150557 2023-01-22 08:22:55.851633: step: 148/77, loss: 0.001842327183112502 2023-01-22 08:22:57.116588: step: 152/77, loss: 0.002845626324415207 2023-01-22 08:22:58.379070: step: 156/77, loss: 0.0007997804787009954 2023-01-22 08:22:59.680503: step: 160/77, loss: 0.017090700566768646 2023-01-22 08:23:00.954054: step: 164/77, loss: 0.003446019720286131 2023-01-22 08:23:02.283933: step: 168/77, loss: 0.05163341388106346 2023-01-22 08:23:03.566435: step: 172/77, loss: 0.0038207899779081345 2023-01-22 08:23:04.864205: step: 176/77, loss: 0.06080837547779083 2023-01-22 08:23:06.160245: step: 180/77, loss: 0.03154751658439636 2023-01-22 08:23:07.509737: step: 184/77, loss: 8.086367597570643e-05 2023-01-22 08:23:08.821031: step: 188/77, loss: 2.9138493118807673e-05 2023-01-22 08:23:10.104412: step: 192/77, loss: 0.022798681631684303 2023-01-22 08:23:11.438950: step: 196/77, loss: 0.02658800594508648 2023-01-22 08:23:12.723555: step: 200/77, loss: 0.05497226119041443 2023-01-22 08:23:14.036534: step: 204/77, loss: 0.0023539061658084393 2023-01-22 08:23:15.372093: step: 208/77, loss: 0.11826759576797485 2023-01-22 08:23:16.602705: step: 212/77, loss: 0.0033361660316586494 2023-01-22 08:23:17.883948: step: 216/77, loss: 0.004443520680069923 2023-01-22 08:23:19.176740: step: 220/77, loss: 0.03105931170284748 2023-01-22 08:23:20.483063: step: 224/77, loss: 0.011646389029920101 2023-01-22 08:23:21.740222: step: 228/77, loss: 0.026408933103084564 2023-01-22 08:23:23.084701: step: 232/77, loss: 0.0027887923642992973 2023-01-22 08:23:24.388756: step: 236/77, loss: 0.005191397853195667 2023-01-22 08:23:25.682834: step: 240/77, loss: 0.0014639300061389804 2023-01-22 08:23:26.950284: step: 244/77, loss: 0.04769325628876686 2023-01-22 08:23:28.243409: step: 248/77, loss: 0.0005501174600794911 2023-01-22 08:23:29.575586: step: 252/77, loss: 0.03677041456103325 2023-01-22 08:23:30.870398: step: 256/77, loss: 0.023332320153713226 2023-01-22 08:23:32.186660: step: 260/77, loss: 0.001704951049759984 2023-01-22 08:23:33.491981: step: 264/77, loss: 0.0013226951705291867 2023-01-22 08:23:34.843220: step: 268/77, loss: 0.0008604861213825643 2023-01-22 08:23:36.170993: step: 272/77, loss: 0.0003232818271499127 2023-01-22 08:23:37.471502: step: 276/77, loss: 0.00019440895994193852 2023-01-22 08:23:38.811764: step: 280/77, loss: 0.0014171466464176774 2023-01-22 08:23:40.086491: step: 284/77, loss: 0.0032911188900470734 2023-01-22 08:23:41.426477: step: 288/77, loss: 5.382840390666388e-05 2023-01-22 08:23:42.774634: step: 292/77, loss: 0.000356964796083048 2023-01-22 08:23:44.125369: step: 296/77, loss: 0.003755199024453759 2023-01-22 08:23:45.419700: step: 300/77, loss: 0.015594424679875374 2023-01-22 08:23:46.723209: step: 304/77, loss: 0.025219272822141647 2023-01-22 08:23:48.016432: step: 308/77, loss: 0.0034283148124814034 2023-01-22 08:23:49.330722: step: 312/77, loss: 0.0035809995606541634 2023-01-22 08:23:50.626830: step: 316/77, loss: 0.0005085446173325181 2023-01-22 08:23:51.975860: step: 320/77, loss: 0.0023056501522660255 2023-01-22 08:23:53.259893: step: 324/77, loss: 0.017061032354831696 2023-01-22 08:23:54.530385: step: 328/77, loss: 0.0009639563504606485 2023-01-22 08:23:55.834141: step: 332/77, loss: 0.04023989662528038 2023-01-22 08:23:57.118323: step: 336/77, loss: 1.9297449398436584e-05 2023-01-22 08:23:58.363737: step: 340/77, loss: 0.027431517839431763 2023-01-22 08:23:59.662875: step: 344/77, loss: 0.009087876416742802 2023-01-22 08:24:00.943777: step: 348/77, loss: 0.00010213730274699628 2023-01-22 08:24:02.291875: step: 352/77, loss: 0.0021237514447420835 2023-01-22 08:24:03.619619: step: 356/77, loss: 0.02595115266740322 2023-01-22 08:24:04.979725: step: 360/77, loss: 0.033148620277643204 2023-01-22 08:24:06.280469: step: 364/77, loss: 2.347496774746105e-05 2023-01-22 08:24:07.582529: step: 368/77, loss: 0.0004097193304914981 2023-01-22 08:24:08.879756: step: 372/77, loss: 0.04141872003674507 2023-01-22 08:24:10.192312: step: 376/77, loss: 0.0002173631073674187 2023-01-22 08:24:11.537779: step: 380/77, loss: 0.0032450349535793066 2023-01-22 08:24:12.864078: step: 384/77, loss: 0.0003417402331251651 2023-01-22 08:24:14.213027: step: 388/77, loss: 1.6842212062329054e-05 ================================================== Loss: 0.018 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 13} Test Chinese: {'template': {'p': 0.9701492537313433, 'r': 0.5158730158730159, 'f1': 0.6735751295336788}, 'slot': {'p': 0.6, 'r': 0.01288659793814433, 'f1': 0.025231286795626577}, 'combined': 0.01699516727166557, 'epoch': 13} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 13} Test Korean: {'template': {'p': 0.9696969696969697, 'r': 0.5079365079365079, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.01288659793814433, 'f1': 0.025231286795626577}, 'combined': 0.01682085786375105, 'epoch': 13} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 13} Test Russian: {'template': {'p': 0.984375, 'r': 0.5, 'f1': 0.6631578947368421}, 'slot': {'p': 0.5925925925925926, 'r': 0.013745704467353952, 'f1': 0.026868178001679264}, 'combined': 0.017817844359008354, 'epoch': 13} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 13} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 13} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 14 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:25:54.497065: step: 4/77, loss: 0.0002736255992203951 2023-01-22 08:25:55.779255: step: 8/77, loss: 0.0004504866083152592 2023-01-22 08:25:57.062010: step: 12/77, loss: 0.00023320181935559958 2023-01-22 08:25:58.339752: step: 16/77, loss: 0.049800168722867966 2023-01-22 08:25:59.681039: step: 20/77, loss: 0.03142474219202995 2023-01-22 08:26:00.999686: step: 24/77, loss: 0.00851297378540039 2023-01-22 08:26:02.314268: step: 28/77, loss: 0.00548419076949358 2023-01-22 08:26:03.630653: step: 32/77, loss: 5.586273800872732e-06 2023-01-22 08:26:04.888395: step: 36/77, loss: 0.0012668132549151778 2023-01-22 08:26:06.172446: step: 40/77, loss: 0.08734573423862457 2023-01-22 08:26:07.476452: step: 44/77, loss: 0.00010684480366762727 2023-01-22 08:26:08.785241: step: 48/77, loss: 0.0009850130882114172 2023-01-22 08:26:10.105634: step: 52/77, loss: 0.034376420080661774 2023-01-22 08:26:11.391117: step: 56/77, loss: 0.0007986004929989576 2023-01-22 08:26:12.699741: step: 60/77, loss: 0.003285888582468033 2023-01-22 08:26:13.995673: step: 64/77, loss: 9.500126907369122e-05 2023-01-22 08:26:15.328791: step: 68/77, loss: 0.040808241814374924 2023-01-22 08:26:16.632127: step: 72/77, loss: 0.0010058830957859755 2023-01-22 08:26:17.903539: step: 76/77, loss: 0.019482817500829697 2023-01-22 08:26:19.200888: step: 80/77, loss: 0.004581984132528305 2023-01-22 08:26:20.480706: step: 84/77, loss: 0.008562111295759678 2023-01-22 08:26:21.752213: step: 88/77, loss: 0.0005003288970328867 2023-01-22 08:26:23.085949: step: 92/77, loss: 0.013277137652039528 2023-01-22 08:26:24.339380: step: 96/77, loss: 0.0015275046462193131 2023-01-22 08:26:25.640139: step: 100/77, loss: 0.04671543464064598 2023-01-22 08:26:26.935199: step: 104/77, loss: 0.0071679409593343735 2023-01-22 08:26:28.235135: step: 108/77, loss: 0.022933460772037506 2023-01-22 08:26:29.543927: step: 112/77, loss: 0.02628178894519806 2023-01-22 08:26:30.844674: step: 116/77, loss: 0.00014312085113488138 2023-01-22 08:26:32.126469: step: 120/77, loss: 0.020610585808753967 2023-01-22 08:26:33.413619: step: 124/77, loss: 0.01356650423258543 2023-01-22 08:26:34.665641: step: 128/77, loss: 0.0012072846293449402 2023-01-22 08:26:35.969565: step: 132/77, loss: 0.025142788887023926 2023-01-22 08:26:37.254646: step: 136/77, loss: 0.055683743208646774 2023-01-22 08:26:38.546048: step: 140/77, loss: 0.0024060329888015985 2023-01-22 08:26:39.857679: step: 144/77, loss: 3.852570807794109e-05 2023-01-22 08:26:41.117847: step: 148/77, loss: 0.0007876998279243708 2023-01-22 08:26:42.438918: step: 152/77, loss: 7.95004962128587e-05 2023-01-22 08:26:43.703020: step: 156/77, loss: 0.03436954692006111 2023-01-22 08:26:44.997794: step: 160/77, loss: 0.02941078506410122 2023-01-22 08:26:46.287815: step: 164/77, loss: 0.0010085589019581676 2023-01-22 08:26:47.592797: step: 168/77, loss: 0.047466427087783813 2023-01-22 08:26:48.875446: step: 172/77, loss: 0.005795814096927643 2023-01-22 08:26:50.212027: step: 176/77, loss: 9.644380770623684e-05 2023-01-22 08:26:51.511297: step: 180/77, loss: 0.006475800182670355 2023-01-22 08:26:52.759539: step: 184/77, loss: 0.0005885373684577644 2023-01-22 08:26:54.083820: step: 188/77, loss: 0.01506776362657547 2023-01-22 08:26:55.388004: step: 192/77, loss: 0.05868987739086151 2023-01-22 08:26:56.674525: step: 196/77, loss: 0.019701024517416954 2023-01-22 08:26:58.027609: step: 200/77, loss: 0.00040737222298048437 2023-01-22 08:26:59.328941: step: 204/77, loss: 0.0006639646599069238 2023-01-22 08:27:00.697922: step: 208/77, loss: 0.00018083356553688645 2023-01-22 08:27:02.042752: step: 212/77, loss: 0.059999510645866394 2023-01-22 08:27:03.319770: step: 216/77, loss: 0.0008782768272794783 2023-01-22 08:27:04.583334: step: 220/77, loss: 0.020827846601605415 2023-01-22 08:27:05.886208: step: 224/77, loss: 0.002391217742115259 2023-01-22 08:27:07.215608: step: 228/77, loss: 0.0037284065037965775 2023-01-22 08:27:08.509455: step: 232/77, loss: 0.02475815825164318 2023-01-22 08:27:09.837292: step: 236/77, loss: 6.199297058628872e-05 2023-01-22 08:27:11.159114: step: 240/77, loss: 0.029774367809295654 2023-01-22 08:27:12.475336: step: 244/77, loss: 0.002180825686082244 2023-01-22 08:27:13.753564: step: 248/77, loss: 0.0032102155964821577 2023-01-22 08:27:15.035104: step: 252/77, loss: 0.0031334550585597754 2023-01-22 08:27:16.280106: step: 256/77, loss: 6.322468107100576e-05 2023-01-22 08:27:17.573355: step: 260/77, loss: 0.025384191423654556 2023-01-22 08:27:18.862641: step: 264/77, loss: 0.0002646015491336584 2023-01-22 08:27:20.147161: step: 268/77, loss: 8.781399628787767e-06 2023-01-22 08:27:21.509271: step: 272/77, loss: 0.03080436773598194 2023-01-22 08:27:22.814034: step: 276/77, loss: 0.00026560225524008274 2023-01-22 08:27:24.174587: step: 280/77, loss: 0.005289306398481131 2023-01-22 08:27:25.483264: step: 284/77, loss: 0.0034095204900950193 2023-01-22 08:27:26.780163: step: 288/77, loss: 0.004925249610096216 2023-01-22 08:27:28.097574: step: 292/77, loss: 3.067413854296319e-05 2023-01-22 08:27:29.447321: step: 296/77, loss: 3.733620178536512e-05 2023-01-22 08:27:30.703320: step: 300/77, loss: 0.00012354001228231937 2023-01-22 08:27:32.042363: step: 304/77, loss: 0.004708366468548775 2023-01-22 08:27:33.365009: step: 308/77, loss: 0.009184081107378006 2023-01-22 08:27:34.717323: step: 312/77, loss: 0.032576557248830795 2023-01-22 08:27:36.031009: step: 316/77, loss: 0.04805753007531166 2023-01-22 08:27:37.332384: step: 320/77, loss: 0.000643449486233294 2023-01-22 08:27:38.646982: step: 324/77, loss: 0.00031785358441993594 2023-01-22 08:27:39.947440: step: 328/77, loss: 0.004113791044801474 2023-01-22 08:27:41.261574: step: 332/77, loss: 0.002717088907957077 2023-01-22 08:27:42.555845: step: 336/77, loss: 0.05559927225112915 2023-01-22 08:27:43.871175: step: 340/77, loss: 0.009818648919463158 2023-01-22 08:27:45.210050: step: 344/77, loss: 0.028968963772058487 2023-01-22 08:27:46.432344: step: 348/77, loss: 0.0009838235564529896 2023-01-22 08:27:47.793272: step: 352/77, loss: 0.0022649941965937614 2023-01-22 08:27:49.071895: step: 356/77, loss: 0.00011140467540826648 2023-01-22 08:27:50.362997: step: 360/77, loss: 0.006448336876928806 2023-01-22 08:27:51.661577: step: 364/77, loss: 0.0014627741184085608 2023-01-22 08:27:52.959015: step: 368/77, loss: 0.0010857736924663186 2023-01-22 08:27:54.279387: step: 372/77, loss: 0.013371062465012074 2023-01-22 08:27:55.558817: step: 376/77, loss: 0.0003074152336921543 2023-01-22 08:27:56.880229: step: 380/77, loss: 0.0006239335052669048 2023-01-22 08:27:58.166307: step: 384/77, loss: 0.027297526597976685 2023-01-22 08:27:59.478694: step: 388/77, loss: 1.0845969882211648e-05 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04247787610619469, 'epoch': 14} Test Chinese: {'template': {'p': 0.9857142857142858, 'r': 0.5476190476190477, 'f1': 0.7040816326530612}, 'slot': {'p': 0.5897435897435898, 'r': 0.019759450171821305, 'f1': 0.03823773898586866}, 'combined': 0.026922489694132013, 'epoch': 14} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04247787610619469, 'epoch': 14} Test Korean: {'template': {'p': 0.9857142857142858, 'r': 0.5476190476190477, 'f1': 0.7040816326530612}, 'slot': {'p': 0.5789473684210527, 'r': 0.018900343642611683, 'f1': 0.03660565723793677}, 'combined': 0.02577337091242487, 'epoch': 14} Dev Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04247787610619469, 'epoch': 14} Test Russian: {'template': {'p': 0.9857142857142858, 'r': 0.5476190476190477, 'f1': 0.7040816326530612}, 'slot': {'p': 0.5945945945945946, 'r': 0.018900343642611683, 'f1': 0.0366361365528726}, 'combined': 0.025794830838247036, 'epoch': 14} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 14} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 14} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 15 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:29:39.723550: step: 4/77, loss: 1.4878492038405966e-05 2023-01-22 08:29:41.040115: step: 8/77, loss: 4.9835511163109913e-05 2023-01-22 08:29:42.382558: step: 12/77, loss: 0.00017194103565998375 2023-01-22 08:29:43.657448: step: 16/77, loss: 0.0014435353223234415 2023-01-22 08:29:44.955670: step: 20/77, loss: 0.003427227959036827 2023-01-22 08:29:46.239597: step: 24/77, loss: 0.02076116017997265 2023-01-22 08:29:47.559385: step: 28/77, loss: 0.03030756488442421 2023-01-22 08:29:48.895556: step: 32/77, loss: 0.008024593815207481 2023-01-22 08:29:50.227714: step: 36/77, loss: 0.02782297693192959 2023-01-22 08:29:51.499402: step: 40/77, loss: 9.715352007333422e-07 2023-01-22 08:29:52.792600: step: 44/77, loss: 0.015568568371236324 2023-01-22 08:29:54.081057: step: 48/77, loss: 0.0001185145738418214 2023-01-22 08:29:55.338019: step: 52/77, loss: 0.03457537293434143 2023-01-22 08:29:56.585297: step: 56/77, loss: 0.015236059203743935 2023-01-22 08:29:57.878540: step: 60/77, loss: 0.0051247659139335155 2023-01-22 08:29:59.183014: step: 64/77, loss: 0.03789728879928589 2023-01-22 08:30:00.435790: step: 68/77, loss: 0.03683306649327278 2023-01-22 08:30:01.779379: step: 72/77, loss: 0.00116816780064255 2023-01-22 08:30:03.062214: step: 76/77, loss: 0.016729649156332016 2023-01-22 08:30:04.393171: step: 80/77, loss: 1.3653680071001872e-05 2023-01-22 08:30:05.689116: step: 84/77, loss: 0.006376555189490318 2023-01-22 08:30:07.045318: step: 88/77, loss: 0.0009691191953606904 2023-01-22 08:30:08.350173: step: 92/77, loss: 0.0029876306653022766 2023-01-22 08:30:09.660052: step: 96/77, loss: 0.042058952152729034 2023-01-22 08:30:10.982143: step: 100/77, loss: 8.360248466487974e-05 2023-01-22 08:30:12.301040: step: 104/77, loss: 0.0072425431571900845 2023-01-22 08:30:13.543582: step: 108/77, loss: 0.003880678676068783 2023-01-22 08:30:14.805711: step: 112/77, loss: 0.0008597972337156534 2023-01-22 08:30:16.135410: step: 116/77, loss: 0.0099770687520504 2023-01-22 08:30:17.421967: step: 120/77, loss: 0.010671457275748253 2023-01-22 08:30:18.738942: step: 124/77, loss: 0.00871829129755497 2023-01-22 08:30:20.042726: step: 128/77, loss: 3.0166500437189825e-05 2023-01-22 08:30:21.371966: step: 132/77, loss: 0.00016145638073794544 2023-01-22 08:30:22.688613: step: 136/77, loss: 3.830725290754344e-06 2023-01-22 08:30:24.018426: step: 140/77, loss: 0.0043679047375917435 2023-01-22 08:30:25.288251: step: 144/77, loss: 0.00938387494534254 2023-01-22 08:30:26.623294: step: 148/77, loss: 0.0004537670756690204 2023-01-22 08:30:27.936600: step: 152/77, loss: 7.381376235571224e-06 2023-01-22 08:30:29.236557: step: 156/77, loss: 0.0018163879867643118 2023-01-22 08:30:30.542120: step: 160/77, loss: 0.012446406297385693 2023-01-22 08:30:31.872626: step: 164/77, loss: 0.0003807510656770319 2023-01-22 08:30:33.143738: step: 168/77, loss: 0.005677470006048679 2023-01-22 08:30:34.417073: step: 172/77, loss: 0.0012833502842113376 2023-01-22 08:30:35.708665: step: 176/77, loss: 0.04827876389026642 2023-01-22 08:30:36.972240: step: 180/77, loss: 0.029316775500774384 2023-01-22 08:30:38.228334: step: 184/77, loss: 9.630203749111388e-06 2023-01-22 08:30:39.515656: step: 188/77, loss: 1.1573029951250646e-05 2023-01-22 08:30:40.863697: step: 192/77, loss: 0.0010241689160466194 2023-01-22 08:30:42.172430: step: 196/77, loss: 0.00017882336396723986 2023-01-22 08:30:43.499640: step: 200/77, loss: 0.004142004065215588 2023-01-22 08:30:44.853234: step: 204/77, loss: 7.169664604589343e-05 2023-01-22 08:30:46.122691: step: 208/77, loss: 0.0007131965248845518 2023-01-22 08:30:47.418621: step: 212/77, loss: 0.002486684825271368 2023-01-22 08:30:48.704006: step: 216/77, loss: 0.032599691301584244 2023-01-22 08:30:50.033866: step: 220/77, loss: 0.0008860656525939703 2023-01-22 08:30:51.417612: step: 224/77, loss: 0.11878086626529694 2023-01-22 08:30:52.764715: step: 228/77, loss: 0.0019146227277815342 2023-01-22 08:30:54.085872: step: 232/77, loss: 0.00997019000351429 2023-01-22 08:30:55.335775: step: 236/77, loss: 0.0006213907618075609 2023-01-22 08:30:56.634005: step: 240/77, loss: 7.765216287225485e-05 2023-01-22 08:30:57.966816: step: 244/77, loss: 0.011932688765227795 2023-01-22 08:30:59.297015: step: 248/77, loss: 1.8564012862043455e-05 2023-01-22 08:31:00.595307: step: 252/77, loss: 0.020145785063505173 2023-01-22 08:31:01.882302: step: 256/77, loss: 0.008753478527069092 2023-01-22 08:31:03.243915: step: 260/77, loss: 0.0072768256068229675 2023-01-22 08:31:04.564750: step: 264/77, loss: 0.03789191693067551 2023-01-22 08:31:05.914175: step: 268/77, loss: 0.0030572679825127125 2023-01-22 08:31:07.226977: step: 272/77, loss: 0.00020431546727195382 2023-01-22 08:31:08.576371: step: 276/77, loss: 0.0017906144494190812 2023-01-22 08:31:09.862192: step: 280/77, loss: 5.944917575106956e-05 2023-01-22 08:31:11.190058: step: 284/77, loss: 0.028044767677783966 2023-01-22 08:31:12.516084: step: 288/77, loss: 0.0019235184881836176 2023-01-22 08:31:13.795690: step: 292/77, loss: 1.2465928193705622e-05 2023-01-22 08:31:15.057572: step: 296/77, loss: 0.005761981941759586 2023-01-22 08:31:16.377797: step: 300/77, loss: 0.00087132235057652 2023-01-22 08:31:17.654293: step: 304/77, loss: 0.007014409638941288 2023-01-22 08:31:18.970925: step: 308/77, loss: 0.0032073655165731907 2023-01-22 08:31:20.270409: step: 312/77, loss: 0.03255422040820122 2023-01-22 08:31:21.551801: step: 316/77, loss: 0.06079493835568428 2023-01-22 08:31:22.848430: step: 320/77, loss: 0.0003763463464565575 2023-01-22 08:31:24.165704: step: 324/77, loss: 0.032440174371004105 2023-01-22 08:31:25.487007: step: 328/77, loss: 0.0023321935441344976 2023-01-22 08:31:26.814437: step: 332/77, loss: 5.185348345548846e-05 2023-01-22 08:31:28.129660: step: 336/77, loss: 8.398528734687716e-05 2023-01-22 08:31:29.446009: step: 340/77, loss: 0.033188119530677795 2023-01-22 08:31:30.779950: step: 344/77, loss: 0.0655626654624939 2023-01-22 08:31:32.099564: step: 348/77, loss: 0.08891092240810394 2023-01-22 08:31:33.392457: step: 352/77, loss: 0.008576600812375546 2023-01-22 08:31:34.648447: step: 356/77, loss: 0.0025107869878411293 2023-01-22 08:31:35.943419: step: 360/77, loss: 0.00036133453249931335 2023-01-22 08:31:37.257555: step: 364/77, loss: 0.018196951597929 2023-01-22 08:31:38.535424: step: 368/77, loss: 0.009679058566689491 2023-01-22 08:31:39.843524: step: 372/77, loss: 0.002200314775109291 2023-01-22 08:31:41.191274: step: 376/77, loss: 0.010349934920668602 2023-01-22 08:31:42.571914: step: 380/77, loss: 5.733857324230485e-05 2023-01-22 08:31:43.896911: step: 384/77, loss: 0.0001564957929076627 2023-01-22 08:31:45.189586: step: 388/77, loss: 0.0032057648058980703 ================================================== Loss: 0.012 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 15} Test Chinese: {'template': {'p': 0.9705882352941176, 'r': 0.5238095238095238, 'f1': 0.6804123711340205}, 'slot': {'p': 0.5666666666666667, 'r': 0.014604810996563574, 'f1': 0.028475711892797323}, 'combined': 0.019375226648707455, 'epoch': 15} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 15} Test Korean: {'template': {'p': 0.9705882352941176, 'r': 0.5238095238095238, 'f1': 0.6804123711340205}, 'slot': {'p': 0.6, 'r': 0.015463917525773196, 'f1': 0.03015075376884422}, 'combined': 0.0205149458633373, 'epoch': 15} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 15} Test Russian: {'template': {'p': 0.9705882352941176, 'r': 0.5238095238095238, 'f1': 0.6804123711340205}, 'slot': {'p': 0.5862068965517241, 'r': 0.014604810996563574, 'f1': 0.02849958088851635}, 'combined': 0.019391467408681227, 'epoch': 15} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 15} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 15} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 15} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 16 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:33:25.683697: step: 4/77, loss: 0.0007765126065351069 2023-01-22 08:33:26.978397: step: 8/77, loss: 0.00012015578977297992 2023-01-22 08:33:28.277758: step: 12/77, loss: 0.008260026574134827 2023-01-22 08:33:29.591738: step: 16/77, loss: 0.00024017225950956345 2023-01-22 08:33:30.869210: step: 20/77, loss: 0.003737919731065631 2023-01-22 08:33:32.146460: step: 24/77, loss: 0.004641602281481028 2023-01-22 08:33:33.449071: step: 28/77, loss: 0.0012632752768695354 2023-01-22 08:33:34.766954: step: 32/77, loss: 0.0055075702257454395 2023-01-22 08:33:36.071263: step: 36/77, loss: 0.030879627913236618 2023-01-22 08:33:37.363904: step: 40/77, loss: 0.004773629363626242 2023-01-22 08:33:38.645442: step: 44/77, loss: 0.02421536110341549 2023-01-22 08:33:39.923745: step: 48/77, loss: 0.0026690459344536066 2023-01-22 08:33:41.190324: step: 52/77, loss: 0.001229959074407816 2023-01-22 08:33:42.472587: step: 56/77, loss: 7.954640022944659e-05 2023-01-22 08:33:43.790334: step: 60/77, loss: 0.00868784636259079 2023-01-22 08:33:45.099851: step: 64/77, loss: 0.003013339824974537 2023-01-22 08:33:46.394536: step: 68/77, loss: 0.00032769900280982256 2023-01-22 08:33:47.677226: step: 72/77, loss: 0.0007191248587332666 2023-01-22 08:33:48.971145: step: 76/77, loss: 7.872861169744283e-05 2023-01-22 08:33:50.261934: step: 80/77, loss: 0.020206786692142487 2023-01-22 08:33:51.539903: step: 84/77, loss: 0.016133133322000504 2023-01-22 08:33:52.831232: step: 88/77, loss: 0.00040447746869176626 2023-01-22 08:33:54.157705: step: 92/77, loss: 0.02237563207745552 2023-01-22 08:33:55.428907: step: 96/77, loss: 0.002664468716830015 2023-01-22 08:33:56.740208: step: 100/77, loss: 0.0001514804025646299 2023-01-22 08:33:58.003367: step: 104/77, loss: 6.90346205374226e-05 2023-01-22 08:33:59.305628: step: 108/77, loss: 0.0008093639044091105 2023-01-22 08:34:00.569681: step: 112/77, loss: 0.000921435363125056 2023-01-22 08:34:01.942774: step: 116/77, loss: 0.006500033661723137 2023-01-22 08:34:03.284125: step: 120/77, loss: 4.7151137550827116e-05 2023-01-22 08:34:04.608674: step: 124/77, loss: 0.0004083859676029533 2023-01-22 08:34:05.898232: step: 128/77, loss: 0.07341621071100235 2023-01-22 08:34:07.239346: step: 132/77, loss: 6.460564327426255e-05 2023-01-22 08:34:08.510532: step: 136/77, loss: 0.008727012202143669 2023-01-22 08:34:09.747948: step: 140/77, loss: 0.03657951205968857 2023-01-22 08:34:11.023430: step: 144/77, loss: 0.00010166480933548883 2023-01-22 08:34:12.341668: step: 148/77, loss: 6.464680154749658e-06 2023-01-22 08:34:13.626287: step: 152/77, loss: 0.00010319374996470287 2023-01-22 08:34:14.891519: step: 156/77, loss: 0.03454245626926422 2023-01-22 08:34:16.197044: step: 160/77, loss: 0.005117420572787523 2023-01-22 08:34:17.484859: step: 164/77, loss: 0.006901135668158531 2023-01-22 08:34:18.772850: step: 168/77, loss: 0.000240602734265849 2023-01-22 08:34:20.093478: step: 172/77, loss: 0.1016627624630928 2023-01-22 08:34:21.388744: step: 176/77, loss: 0.00012070147931808606 2023-01-22 08:34:22.700568: step: 180/77, loss: 4.286599505576305e-05 2023-01-22 08:34:23.957298: step: 184/77, loss: 0.00014440326776821166 2023-01-22 08:34:25.239736: step: 188/77, loss: 0.0006260947557166219 2023-01-22 08:34:26.546463: step: 192/77, loss: 0.0010012636194005609 2023-01-22 08:34:27.847020: step: 196/77, loss: 0.002178141148760915 2023-01-22 08:34:29.120337: step: 200/77, loss: 0.05275914445519447 2023-01-22 08:34:30.462454: step: 204/77, loss: 0.03453891724348068 2023-01-22 08:34:31.761443: step: 208/77, loss: 0.001776168355718255 2023-01-22 08:34:33.122319: step: 212/77, loss: 2.2828473447589204e-05 2023-01-22 08:34:34.408348: step: 216/77, loss: 0.06948499381542206 2023-01-22 08:34:35.696341: step: 220/77, loss: 0.015457428991794586 2023-01-22 08:34:36.981048: step: 224/77, loss: 0.026023317128419876 2023-01-22 08:34:38.278626: step: 228/77, loss: 0.042440950870513916 2023-01-22 08:34:39.588482: step: 232/77, loss: 8.644620538689196e-05 2023-01-22 08:34:40.946304: step: 236/77, loss: 0.019920025020837784 2023-01-22 08:34:42.311808: step: 240/77, loss: 0.0416274294257164 2023-01-22 08:34:43.606127: step: 244/77, loss: 3.288514926680364e-05 2023-01-22 08:34:44.944151: step: 248/77, loss: 0.0008118004188872874 2023-01-22 08:34:46.210119: step: 252/77, loss: 0.0020588578190654516 2023-01-22 08:34:47.531626: step: 256/77, loss: 0.0016918214969336987 2023-01-22 08:34:48.820080: step: 260/77, loss: 0.004299009684473276 2023-01-22 08:34:50.091610: step: 264/77, loss: 0.0018904039170593023 2023-01-22 08:34:51.412401: step: 268/77, loss: 0.003573576221242547 2023-01-22 08:34:52.704339: step: 272/77, loss: 0.0013117672642692924 2023-01-22 08:34:54.002872: step: 276/77, loss: 0.0018177537713199854 2023-01-22 08:34:55.340009: step: 280/77, loss: 0.0013126140693202615 2023-01-22 08:34:56.661921: step: 284/77, loss: 0.003976478241384029 2023-01-22 08:34:57.972001: step: 288/77, loss: 0.01663769781589508 2023-01-22 08:34:59.253669: step: 292/77, loss: 0.10017237067222595 2023-01-22 08:35:00.541786: step: 296/77, loss: 0.0033373809419572353 2023-01-22 08:35:01.838050: step: 300/77, loss: 0.002248018980026245 2023-01-22 08:35:03.160027: step: 304/77, loss: 0.10106675326824188 2023-01-22 08:35:04.426612: step: 308/77, loss: 0.0033508699852973223 2023-01-22 08:35:05.754946: step: 312/77, loss: 0.0002143883320968598 2023-01-22 08:35:07.039819: step: 316/77, loss: 0.020443376153707504 2023-01-22 08:35:08.341303: step: 320/77, loss: 0.00360716856084764 2023-01-22 08:35:09.622018: step: 324/77, loss: 0.0013300712453201413 2023-01-22 08:35:10.917194: step: 328/77, loss: 0.01074330136179924 2023-01-22 08:35:12.253505: step: 332/77, loss: 0.0008238847367465496 2023-01-22 08:35:13.549945: step: 336/77, loss: 0.005398381967097521 2023-01-22 08:35:14.828397: step: 340/77, loss: 2.7465936000226066e-05 2023-01-22 08:35:16.051547: step: 344/77, loss: 0.01161793153733015 2023-01-22 08:35:17.353771: step: 348/77, loss: 0.005161845590919256 2023-01-22 08:35:18.669339: step: 352/77, loss: 7.165823626564816e-05 2023-01-22 08:35:20.017690: step: 356/77, loss: 1.2400157174852211e-05 2023-01-22 08:35:21.348185: step: 360/77, loss: 0.0018668932607397437 2023-01-22 08:35:22.702794: step: 364/77, loss: 0.017863446846604347 2023-01-22 08:35:24.074002: step: 368/77, loss: 0.03986204415559769 2023-01-22 08:35:25.418135: step: 372/77, loss: 0.05790456384420395 2023-01-22 08:35:26.775658: step: 376/77, loss: 0.011296149343252182 2023-01-22 08:35:28.108282: step: 380/77, loss: 0.08306736499071121 2023-01-22 08:35:29.394770: step: 384/77, loss: 0.0009478795109316707 2023-01-22 08:35:30.654197: step: 388/77, loss: 0.00278334878385067 ================================================== Loss: 0.013 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Chinese: {'template': {'p': 0.9583333333333334, 'r': 0.5476190476190477, 'f1': 0.696969696969697}, 'slot': {'p': 0.6341463414634146, 'r': 0.022336769759450172, 'f1': 0.043153526970954356}, 'combined': 0.030076700616119705, 'epoch': 16} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Korean: {'template': {'p': 0.9583333333333334, 'r': 0.5476190476190477, 'f1': 0.696969696969697}, 'slot': {'p': 0.625, 'r': 0.02147766323024055, 'f1': 0.04152823920265781}, 'combined': 0.028943924292761505, 'epoch': 16} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 16} Test Russian: {'template': {'p': 0.9583333333333334, 'r': 0.5476190476190477, 'f1': 0.696969696969697}, 'slot': {'p': 0.6341463414634146, 'r': 0.022336769759450172, 'f1': 0.043153526970954356}, 'combined': 0.030076700616119705, 'epoch': 16} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 16} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 16} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 16} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 17 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:37:11.221023: step: 4/77, loss: 0.002295812126249075 2023-01-22 08:37:12.472536: step: 8/77, loss: 0.011710532009601593 2023-01-22 08:37:13.769714: step: 12/77, loss: 2.3140978555602487e-06 2023-01-22 08:37:15.072669: step: 16/77, loss: 2.1024964098614873e-06 2023-01-22 08:37:16.377075: step: 20/77, loss: 0.0002727890096139163 2023-01-22 08:37:17.663452: step: 24/77, loss: 6.954662239877507e-05 2023-01-22 08:37:18.998769: step: 28/77, loss: 0.0008974984521046281 2023-01-22 08:37:20.283613: step: 32/77, loss: 2.525923446228262e-05 2023-01-22 08:37:21.591578: step: 36/77, loss: 0.042360275983810425 2023-01-22 08:37:22.908709: step: 40/77, loss: 0.00043919257586821914 2023-01-22 08:37:24.172777: step: 44/77, loss: 0.001692838268354535 2023-01-22 08:37:25.466924: step: 48/77, loss: 0.00035355580621398985 2023-01-22 08:37:26.767331: step: 52/77, loss: 0.0008009643061086535 2023-01-22 08:37:28.071687: step: 56/77, loss: 0.0065523674711585045 2023-01-22 08:37:29.379685: step: 60/77, loss: 0.01772763580083847 2023-01-22 08:37:30.655935: step: 64/77, loss: 0.001366381999105215 2023-01-22 08:37:32.010170: step: 68/77, loss: 0.07654806971549988 2023-01-22 08:37:33.321143: step: 72/77, loss: 1.4059327440918423e-05 2023-01-22 08:37:34.638512: step: 76/77, loss: 0.016522567719221115 2023-01-22 08:37:35.940008: step: 80/77, loss: 0.008804031647741795 2023-01-22 08:37:37.289081: step: 84/77, loss: 0.009108972735702991 2023-01-22 08:37:38.670030: step: 88/77, loss: 0.0046868640929460526 2023-01-22 08:37:40.041040: step: 92/77, loss: 0.087021104991436 2023-01-22 08:37:41.323490: step: 96/77, loss: 3.298327646916732e-05 2023-01-22 08:37:42.675005: step: 100/77, loss: 0.014686529524624348 2023-01-22 08:37:43.930592: step: 104/77, loss: 7.930253559607081e-06 2023-01-22 08:37:45.194842: step: 108/77, loss: 0.011731461621820927 2023-01-22 08:37:46.477354: step: 112/77, loss: 0.0001062441078829579 2023-01-22 08:37:47.837269: step: 116/77, loss: 0.024497399106621742 2023-01-22 08:37:49.206245: step: 120/77, loss: 0.01568659022450447 2023-01-22 08:37:50.494950: step: 124/77, loss: 0.04065697267651558 2023-01-22 08:37:51.800548: step: 128/77, loss: 0.004619527142494917 2023-01-22 08:37:53.112986: step: 132/77, loss: 2.9057164852019923e-07 2023-01-22 08:37:54.376136: step: 136/77, loss: 2.6861227524932474e-05 2023-01-22 08:37:55.682538: step: 140/77, loss: 0.00025511058629490435 2023-01-22 08:37:56.990681: step: 144/77, loss: 0.017772279679775238 2023-01-22 08:37:58.282578: step: 148/77, loss: 0.0006557226879522204 2023-01-22 08:37:59.583683: step: 152/77, loss: 0.010482666082680225 2023-01-22 08:38:00.882255: step: 156/77, loss: 0.13506212830543518 2023-01-22 08:38:02.181170: step: 160/77, loss: 0.19042086601257324 2023-01-22 08:38:03.458180: step: 164/77, loss: 0.0016938840271905065 2023-01-22 08:38:04.781338: step: 168/77, loss: 0.002290728036314249 2023-01-22 08:38:06.103870: step: 172/77, loss: 0.0020732858683913946 2023-01-22 08:38:07.383868: step: 176/77, loss: 0.0001949071593116969 2023-01-22 08:38:08.677323: step: 180/77, loss: 0.00787076260894537 2023-01-22 08:38:10.026968: step: 184/77, loss: 5.562337719311472e-06 2023-01-22 08:38:11.329605: step: 188/77, loss: 4.0657974750502035e-05 2023-01-22 08:38:12.627568: step: 192/77, loss: 0.06441272795200348 2023-01-22 08:38:13.921606: step: 196/77, loss: 0.009098620153963566 2023-01-22 08:38:15.202837: step: 200/77, loss: 0.002766120946034789 2023-01-22 08:38:16.541307: step: 204/77, loss: 0.029924781993031502 2023-01-22 08:38:17.862754: step: 208/77, loss: 0.000314296135911718 2023-01-22 08:38:19.202371: step: 212/77, loss: 0.006736287847161293 2023-01-22 08:38:20.450295: step: 216/77, loss: 0.02019321545958519 2023-01-22 08:38:21.753066: step: 220/77, loss: 0.005796336568892002 2023-01-22 08:38:23.062301: step: 224/77, loss: 0.0067178416065871716 2023-01-22 08:38:24.411396: step: 228/77, loss: 0.00044596640509553254 2023-01-22 08:38:25.737944: step: 232/77, loss: 0.028060777112841606 2023-01-22 08:38:26.992625: step: 236/77, loss: 0.00021894060773774981 2023-01-22 08:38:28.316456: step: 240/77, loss: 1.1538486432982609e-05 2023-01-22 08:38:29.609610: step: 244/77, loss: 0.01234140433371067 2023-01-22 08:38:30.816295: step: 248/77, loss: 0.0018924312898889184 2023-01-22 08:38:32.145314: step: 252/77, loss: 0.048851627856492996 2023-01-22 08:38:33.434828: step: 256/77, loss: 0.0031984003726392984 2023-01-22 08:38:34.672621: step: 260/77, loss: 0.008823526091873646 2023-01-22 08:38:35.984981: step: 264/77, loss: 0.016866758465766907 2023-01-22 08:38:37.337612: step: 268/77, loss: 0.014881649054586887 2023-01-22 08:38:38.641630: step: 272/77, loss: 9.640722419135273e-05 2023-01-22 08:38:39.953876: step: 276/77, loss: 0.05817005783319473 2023-01-22 08:38:41.261331: step: 280/77, loss: 0.01585889607667923 2023-01-22 08:38:42.554775: step: 284/77, loss: 0.015098603442311287 2023-01-22 08:38:43.887761: step: 288/77, loss: 0.054936982691287994 2023-01-22 08:38:45.205224: step: 292/77, loss: 0.11156722158193588 2023-01-22 08:38:46.494041: step: 296/77, loss: 0.013453776948153973 2023-01-22 08:38:47.767695: step: 300/77, loss: 0.003620242001488805 2023-01-22 08:38:49.023897: step: 304/77, loss: 0.004386755637824535 2023-01-22 08:38:50.335052: step: 308/77, loss: 9.987157682189718e-05 2023-01-22 08:38:51.648820: step: 312/77, loss: 0.019409600645303726 2023-01-22 08:38:52.961360: step: 316/77, loss: 0.003329006489366293 2023-01-22 08:38:54.276536: step: 320/77, loss: 0.00030134027474559844 2023-01-22 08:38:55.562776: step: 324/77, loss: 1.3888611647416838e-05 2023-01-22 08:38:56.860869: step: 328/77, loss: 0.04591492563486099 2023-01-22 08:38:58.123639: step: 332/77, loss: 0.0008835216867737472 2023-01-22 08:38:59.419899: step: 336/77, loss: 0.0014211467932909727 2023-01-22 08:39:00.720141: step: 340/77, loss: 0.005028215236961842 2023-01-22 08:39:02.031583: step: 344/77, loss: 0.0277964249253273 2023-01-22 08:39:03.372752: step: 348/77, loss: 0.002034218981862068 2023-01-22 08:39:04.655805: step: 352/77, loss: 2.3846632757340558e-05 2023-01-22 08:39:06.004537: step: 356/77, loss: 0.00022707131574861705 2023-01-22 08:39:07.344539: step: 360/77, loss: 0.0015117726288735867 2023-01-22 08:39:08.641373: step: 364/77, loss: 0.006705767475068569 2023-01-22 08:39:09.970433: step: 368/77, loss: 0.0004414547875057906 2023-01-22 08:39:11.285921: step: 372/77, loss: 3.6705201637232676e-05 2023-01-22 08:39:12.575669: step: 376/77, loss: 0.002922557760030031 2023-01-22 08:39:13.918244: step: 380/77, loss: 0.0046601551584899426 2023-01-22 08:39:15.288121: step: 384/77, loss: 0.022119011729955673 2023-01-22 08:39:16.607904: step: 388/77, loss: 0.030487284064292908 ================================================== Loss: 0.016 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04988944951527864, 'epoch': 17} Test Chinese: {'template': {'p': 0.9577464788732394, 'r': 0.5396825396825397, 'f1': 0.6903553299492384}, 'slot': {'p': 0.5806451612903226, 'r': 0.015463917525773196, 'f1': 0.030125523012552297}, 'combined': 0.020797315379223916, 'epoch': 17} Dev Korean: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04988944951527864, 'epoch': 17} Test Korean: {'template': {'p': 0.9577464788732394, 'r': 0.5396825396825397, 'f1': 0.6903553299492384}, 'slot': {'p': 0.5588235294117647, 'r': 0.01632302405498282, 'f1': 0.03171953255425709}, 'combined': 0.021897748362329765, 'epoch': 17} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.04988944951527864, 'epoch': 17} Test Russian: {'template': {'p': 0.9583333333333334, 'r': 0.5476190476190477, 'f1': 0.696969696969697}, 'slot': {'p': 0.5806451612903226, 'r': 0.015463917525773196, 'f1': 0.030125523012552297}, 'combined': 0.02099657664511221, 'epoch': 17} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 17} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 17} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 17} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 18 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:40:56.880673: step: 4/77, loss: 0.00043394678505137563 2023-01-22 08:40:58.187765: step: 8/77, loss: 0.024783240631222725 2023-01-22 08:40:59.467324: step: 12/77, loss: 0.030805286020040512 2023-01-22 08:41:00.732994: step: 16/77, loss: 0.0003471440286375582 2023-01-22 08:41:02.032936: step: 20/77, loss: 0.014186517335474491 2023-01-22 08:41:03.284132: step: 24/77, loss: 2.8209165975567885e-05 2023-01-22 08:41:04.573944: step: 28/77, loss: 0.017384840175509453 2023-01-22 08:41:05.866072: step: 32/77, loss: 0.013848107308149338 2023-01-22 08:41:07.150633: step: 36/77, loss: 0.018242739140987396 2023-01-22 08:41:08.494572: step: 40/77, loss: 0.017047669738531113 2023-01-22 08:41:09.822056: step: 44/77, loss: 2.3446407794835977e-05 2023-01-22 08:41:11.131729: step: 48/77, loss: 0.0007906182436272502 2023-01-22 08:41:12.472746: step: 52/77, loss: 4.537371569313109e-05 2023-01-22 08:41:13.763645: step: 56/77, loss: 0.0015261220978572965 2023-01-22 08:41:15.073359: step: 60/77, loss: 0.03159713000059128 2023-01-22 08:41:16.392353: step: 64/77, loss: 0.0020535080693662167 2023-01-22 08:41:17.724123: step: 68/77, loss: 0.02969714067876339 2023-01-22 08:41:19.011861: step: 72/77, loss: 0.000775107997469604 2023-01-22 08:41:20.331382: step: 76/77, loss: 0.005453579593449831 2023-01-22 08:41:21.645736: step: 80/77, loss: 0.007755403406918049 2023-01-22 08:41:22.904293: step: 84/77, loss: 0.00011459521192591637 2023-01-22 08:41:24.221655: step: 88/77, loss: 9.09103164303815e-06 2023-01-22 08:41:25.483947: step: 92/77, loss: 0.0023775151930749416 2023-01-22 08:41:26.780616: step: 96/77, loss: 0.023311413824558258 2023-01-22 08:41:28.108390: step: 100/77, loss: 0.0037164664827287197 2023-01-22 08:41:29.431159: step: 104/77, loss: 0.04141361266374588 2023-01-22 08:41:30.784151: step: 108/77, loss: 0.015981419011950493 2023-01-22 08:41:32.106093: step: 112/77, loss: 4.68757571070455e-06 2023-01-22 08:41:33.392596: step: 116/77, loss: 2.9949942472740076e-06 2023-01-22 08:41:34.699410: step: 120/77, loss: 9.493254765402526e-05 2023-01-22 08:41:35.983354: step: 124/77, loss: 0.018639886751770973 2023-01-22 08:41:37.212902: step: 128/77, loss: 0.05404244363307953 2023-01-22 08:41:38.490519: step: 132/77, loss: 1.4669490155938547e-05 2023-01-22 08:41:39.780721: step: 136/77, loss: 0.008017289452254772 2023-01-22 08:41:41.081327: step: 140/77, loss: 0.0006571430712938309 2023-01-22 08:41:42.406462: step: 144/77, loss: 0.0001994997583096847 2023-01-22 08:41:43.717081: step: 148/77, loss: 0.0187743678689003 2023-01-22 08:41:44.976157: step: 152/77, loss: 0.0399123877286911 2023-01-22 08:41:46.254373: step: 156/77, loss: 0.005981434136629105 2023-01-22 08:41:47.526919: step: 160/77, loss: 0.001792241120710969 2023-01-22 08:41:48.838990: step: 164/77, loss: 0.0022695541847497225 2023-01-22 08:41:50.133042: step: 168/77, loss: 0.00027215006412006915 2023-01-22 08:41:51.449625: step: 172/77, loss: 0.0007794547127559781 2023-01-22 08:41:52.739510: step: 176/77, loss: 5.349092680262402e-05 2023-01-22 08:41:54.048225: step: 180/77, loss: 0.00024154865241143852 2023-01-22 08:41:55.435663: step: 184/77, loss: 0.042345404624938965 2023-01-22 08:41:56.738663: step: 188/77, loss: 0.0012398757971823215 2023-01-22 08:41:58.037758: step: 192/77, loss: 0.022443875670433044 2023-01-22 08:41:59.376290: step: 196/77, loss: 2.1823982024216093e-05 2023-01-22 08:42:00.728887: step: 200/77, loss: 1.095620700652944e-05 2023-01-22 08:42:02.012004: step: 204/77, loss: 0.0005885653663426638 2023-01-22 08:42:03.342060: step: 208/77, loss: 0.00010669405310181901 2023-01-22 08:42:04.620807: step: 212/77, loss: 1.8998762243427336e-06 2023-01-22 08:42:05.891302: step: 216/77, loss: 0.021896017715334892 2023-01-22 08:42:07.195552: step: 220/77, loss: 0.00041468662675470114 2023-01-22 08:42:08.509217: step: 224/77, loss: 0.02845904417335987 2023-01-22 08:42:09.757172: step: 228/77, loss: 1.4626778465753887e-05 2023-01-22 08:42:11.074813: step: 232/77, loss: 1.9547209376469254e-05 2023-01-22 08:42:12.356554: step: 236/77, loss: 0.003462222870439291 2023-01-22 08:42:13.668006: step: 240/77, loss: 0.00018325488781556487 2023-01-22 08:42:14.996325: step: 244/77, loss: 0.000745791126973927 2023-01-22 08:42:16.330557: step: 248/77, loss: 5.230200486039394e-07 2023-01-22 08:42:17.610852: step: 252/77, loss: 0.00013812491670250893 2023-01-22 08:42:18.912756: step: 256/77, loss: 9.543041232973337e-05 2023-01-22 08:42:20.214155: step: 260/77, loss: 2.5750792701728642e-05 2023-01-22 08:42:21.510864: step: 264/77, loss: 7.58459500502795e-05 2023-01-22 08:42:22.758697: step: 268/77, loss: 1.9997112303826725e-06 2023-01-22 08:42:24.016122: step: 272/77, loss: 3.58948955181404e-06 2023-01-22 08:42:25.294434: step: 276/77, loss: 7.18016917744535e-06 2023-01-22 08:42:26.584468: step: 280/77, loss: 0.0010476586176082492 2023-01-22 08:42:27.914947: step: 284/77, loss: 0.08216924965381622 2023-01-22 08:42:29.202574: step: 288/77, loss: 0.0045933956280350685 2023-01-22 08:42:30.522156: step: 292/77, loss: 0.02022452838718891 2023-01-22 08:42:31.883652: step: 296/77, loss: 3.2676407499820925e-06 2023-01-22 08:42:33.087428: step: 300/77, loss: 8.142985461745411e-05 2023-01-22 08:42:34.432933: step: 304/77, loss: 6.297406798694283e-05 2023-01-22 08:42:35.753814: step: 308/77, loss: 4.044241723022424e-05 2023-01-22 08:42:37.041698: step: 312/77, loss: 0.009998363442718983 2023-01-22 08:42:38.346214: step: 316/77, loss: 0.0004960843361914158 2023-01-22 08:42:39.681343: step: 320/77, loss: 2.751972488113097e-06 2023-01-22 08:42:40.999430: step: 324/77, loss: 0.00029293610714375973 2023-01-22 08:42:42.282678: step: 328/77, loss: 0.021552830934524536 2023-01-22 08:42:43.654824: step: 332/77, loss: 0.021268876269459724 2023-01-22 08:42:44.951672: step: 336/77, loss: 0.0005264312494546175 2023-01-22 08:42:46.281011: step: 340/77, loss: 9.673903150542174e-06 2023-01-22 08:42:47.590274: step: 344/77, loss: 0.020955218002200127 2023-01-22 08:42:48.908878: step: 348/77, loss: 3.5274133551865816e-05 2023-01-22 08:42:50.181755: step: 352/77, loss: 1.1457627806521486e-05 2023-01-22 08:42:51.435670: step: 356/77, loss: 0.0015478396089747548 2023-01-22 08:42:52.746442: step: 360/77, loss: 9.202599903801456e-05 2023-01-22 08:42:54.091669: step: 364/77, loss: 0.0002595757250674069 2023-01-22 08:42:55.383124: step: 368/77, loss: 0.00846023764461279 2023-01-22 08:42:56.706991: step: 372/77, loss: 0.0002929982729256153 2023-01-22 08:42:57.968476: step: 376/77, loss: 0.004742179997265339 2023-01-22 08:42:59.279236: step: 380/77, loss: 0.00033104015165008605 2023-01-22 08:43:00.586922: step: 384/77, loss: 0.003096122294664383 2023-01-22 08:43:01.881851: step: 388/77, loss: 4.091663868166506e-06 ================================================== Loss: 0.008 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5166666666666667, 'f1': 0.6813186813186815}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.043411455800836336, 'epoch': 18} Test Chinese: {'template': {'p': 0.9696969696969697, 'r': 0.5079365079365079, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6190476190476191, 'r': 0.022336769759450172, 'f1': 0.04311774461028192}, 'combined': 0.02874516307352128, 'epoch': 18} Dev Korean: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.0443247402847249, 'epoch': 18} Test Korean: {'template': {'p': 0.9696969696969697, 'r': 0.5079365079365079, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6190476190476191, 'r': 0.022336769759450172, 'f1': 0.04311774461028192}, 'combined': 0.02874516307352128, 'epoch': 18} Dev Russian: {'template': {'p': 1.0, 'r': 0.5166666666666667, 'f1': 0.6813186813186815}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.043411455800836336, 'epoch': 18} Test Russian: {'template': {'p': 0.9701492537313433, 'r': 0.5158730158730159, 'f1': 0.6735751295336788}, 'slot': {'p': 0.6, 'r': 0.020618556701030927, 'f1': 0.03986710963455149}, 'combined': 0.026853493536226396, 'epoch': 18} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 18} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 18} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 19 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:44:42.257886: step: 4/77, loss: 0.08355798572301865 2023-01-22 08:44:43.544264: step: 8/77, loss: 0.01619032397866249 2023-01-22 08:44:44.866182: step: 12/77, loss: 0.006282666698098183 2023-01-22 08:44:46.142467: step: 16/77, loss: 0.04044962674379349 2023-01-22 08:44:47.401327: step: 20/77, loss: 2.1754760837211506e-06 2023-01-22 08:44:48.696385: step: 24/77, loss: 0.001033884473145008 2023-01-22 08:44:50.032166: step: 28/77, loss: 0.0010141909588128328 2023-01-22 08:44:51.309659: step: 32/77, loss: 0.0029271808452904224 2023-01-22 08:44:52.605863: step: 36/77, loss: 1.6010548279155046e-05 2023-01-22 08:44:53.889230: step: 40/77, loss: 0.029112864285707474 2023-01-22 08:44:55.159095: step: 44/77, loss: 4.747176262753783e-06 2023-01-22 08:44:56.449243: step: 48/77, loss: 3.406681207707152e-05 2023-01-22 08:44:57.735910: step: 52/77, loss: 0.0006297418149188161 2023-01-22 08:44:59.036958: step: 56/77, loss: 5.055548172094859e-06 2023-01-22 08:45:00.322241: step: 60/77, loss: 4.45558616775088e-05 2023-01-22 08:45:01.678842: step: 64/77, loss: 1.1200027074664831e-05 2023-01-22 08:45:02.999407: step: 68/77, loss: 0.0011179175926372409 2023-01-22 08:45:04.323016: step: 72/77, loss: 8.404656546190381e-05 2023-01-22 08:45:05.635115: step: 76/77, loss: 0.0016348809003829956 2023-01-22 08:45:06.945836: step: 80/77, loss: 0.0011514124926179647 2023-01-22 08:45:08.228637: step: 84/77, loss: 0.04622327536344528 2023-01-22 08:45:09.518874: step: 88/77, loss: 0.01025646273046732 2023-01-22 08:45:10.866419: step: 92/77, loss: 8.574593084631488e-05 2023-01-22 08:45:12.232725: step: 96/77, loss: 0.0008810489089228213 2023-01-22 08:45:13.488617: step: 100/77, loss: 0.20770668983459473 2023-01-22 08:45:14.816280: step: 104/77, loss: 0.025355808436870575 2023-01-22 08:45:16.159363: step: 108/77, loss: 0.00010351461969548836 2023-01-22 08:45:17.463281: step: 112/77, loss: 0.0005154706886969507 2023-01-22 08:45:18.746300: step: 116/77, loss: 0.00017362600192427635 2023-01-22 08:45:20.043874: step: 120/77, loss: 4.7918918426148593e-05 2023-01-22 08:45:21.318032: step: 124/77, loss: 1.1120209819637239e-05 2023-01-22 08:45:22.612929: step: 128/77, loss: 5.7865377129928675e-06 2023-01-22 08:45:23.909178: step: 132/77, loss: 0.0011631065281108022 2023-01-22 08:45:25.163472: step: 136/77, loss: 0.0003213899035472423 2023-01-22 08:45:26.432742: step: 140/77, loss: 0.016531798988580704 2023-01-22 08:45:27.736685: step: 144/77, loss: 0.0002846270508598536 2023-01-22 08:45:29.087224: step: 148/77, loss: 5.7959747209679335e-05 2023-01-22 08:45:30.437630: step: 152/77, loss: 0.001835025497712195 2023-01-22 08:45:31.795754: step: 156/77, loss: 7.920786447357386e-05 2023-01-22 08:45:33.128966: step: 160/77, loss: 0.03611599653959274 2023-01-22 08:45:34.437499: step: 164/77, loss: 0.024288857355713844 2023-01-22 08:45:35.749373: step: 168/77, loss: 0.00010517123155295849 2023-01-22 08:45:37.061914: step: 172/77, loss: 0.001092438818886876 2023-01-22 08:45:38.333872: step: 176/77, loss: 0.007640526629984379 2023-01-22 08:45:39.648295: step: 180/77, loss: 0.0007359448936767876 2023-01-22 08:45:40.971657: step: 184/77, loss: 1.0519408533582464e-05 2023-01-22 08:45:42.311013: step: 188/77, loss: 0.00011256665311520919 2023-01-22 08:45:43.639492: step: 192/77, loss: 0.0020343991927802563 2023-01-22 08:45:44.932883: step: 196/77, loss: 2.485430059095961e-06 2023-01-22 08:45:46.265423: step: 200/77, loss: 0.0034714534413069487 2023-01-22 08:45:47.601301: step: 204/77, loss: 0.17109277844429016 2023-01-22 08:45:48.887401: step: 208/77, loss: 0.002324912929907441 2023-01-22 08:45:50.184395: step: 212/77, loss: 0.013834369368851185 2023-01-22 08:45:51.525343: step: 216/77, loss: 0.004987460561096668 2023-01-22 08:45:52.813344: step: 220/77, loss: 0.00308143999427557 2023-01-22 08:45:54.125292: step: 224/77, loss: 0.00012807335588149726 2023-01-22 08:45:55.412741: step: 228/77, loss: 0.00022715324303135276 2023-01-22 08:45:56.720535: step: 232/77, loss: 0.00017162026779260486 2023-01-22 08:45:58.016311: step: 236/77, loss: 0.003213282907381654 2023-01-22 08:45:59.292411: step: 240/77, loss: 0.0026016314513981342 2023-01-22 08:46:00.607063: step: 244/77, loss: 0.0205234307795763 2023-01-22 08:46:01.895848: step: 248/77, loss: 0.0006555135478265584 2023-01-22 08:46:03.282076: step: 252/77, loss: 0.0015501710586249828 2023-01-22 08:46:04.561064: step: 256/77, loss: 0.07120595127344131 2023-01-22 08:46:05.890209: step: 260/77, loss: 0.04038810729980469 2023-01-22 08:46:07.155046: step: 264/77, loss: 0.005638515576720238 2023-01-22 08:46:08.468327: step: 268/77, loss: 0.014268961735069752 2023-01-22 08:46:09.795541: step: 272/77, loss: 2.5206656573573127e-05 2023-01-22 08:46:11.086966: step: 276/77, loss: 9.096400026464835e-05 2023-01-22 08:46:12.439599: step: 280/77, loss: 0.000966939958743751 2023-01-22 08:46:13.791957: step: 284/77, loss: 0.008116503246128559 2023-01-22 08:46:15.098642: step: 288/77, loss: 0.007118896581232548 2023-01-22 08:46:16.423896: step: 292/77, loss: 2.1705160179408267e-05 2023-01-22 08:46:17.761942: step: 296/77, loss: 0.0001560598029755056 2023-01-22 08:46:19.032476: step: 300/77, loss: 9.638090705266222e-05 2023-01-22 08:46:20.348465: step: 304/77, loss: 0.0005366081604734063 2023-01-22 08:46:21.648231: step: 308/77, loss: 0.0001767091453075409 2023-01-22 08:46:22.906788: step: 312/77, loss: 4.1324805351905525e-05 2023-01-22 08:46:24.194493: step: 316/77, loss: 0.0008603700553067029 2023-01-22 08:46:25.484389: step: 320/77, loss: 9.919640433508903e-05 2023-01-22 08:46:26.816166: step: 324/77, loss: 0.0005278933676891029 2023-01-22 08:46:28.109265: step: 328/77, loss: 0.00010486481914995238 2023-01-22 08:46:29.482744: step: 332/77, loss: 0.0002847413707058877 2023-01-22 08:46:30.779700: step: 336/77, loss: 4.130376692046411e-05 2023-01-22 08:46:32.111794: step: 340/77, loss: 0.004138815216720104 2023-01-22 08:46:33.430339: step: 344/77, loss: 1.7732038486428792e-06 2023-01-22 08:46:34.753468: step: 348/77, loss: 0.0001266787585336715 2023-01-22 08:46:36.060739: step: 352/77, loss: 0.0001363552873954177 2023-01-22 08:46:37.385146: step: 356/77, loss: 0.0012341360561549664 2023-01-22 08:46:38.737118: step: 360/77, loss: 0.00014551937056239694 2023-01-22 08:46:40.093895: step: 364/77, loss: 0.0005572434747591615 2023-01-22 08:46:41.454169: step: 368/77, loss: 0.00889385025948286 2023-01-22 08:46:42.730685: step: 372/77, loss: 5.846058229508344e-06 2023-01-22 08:46:44.009399: step: 376/77, loss: 0.00043951894622296095 2023-01-22 08:46:45.289004: step: 380/77, loss: 0.001410671859048307 2023-01-22 08:46:46.598295: step: 384/77, loss: 0.007698127068579197 2023-01-22 08:46:47.884712: step: 388/77, loss: 0.00037329865153878927 ================================================== Loss: 0.010 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.0443247402847249, 'epoch': 19} Test Chinese: {'template': {'p': 0.9420289855072463, 'r': 0.5158730158730159, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5833333333333334, 'r': 0.01804123711340206, 'f1': 0.03499999999999999}, 'combined': 0.023333333333333324, 'epoch': 19} Dev Korean: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 19} Test Korean: {'template': {'p': 0.9420289855072463, 'r': 0.5158730158730159, 'f1': 0.6666666666666666}, 'slot': {'p': 0.6, 'r': 0.01804123711340206, 'f1': 0.035029190992493735}, 'combined': 0.023352793994995822, 'epoch': 19} Dev Russian: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.0443247402847249, 'epoch': 19} Test Russian: {'template': {'p': 0.9420289855072463, 'r': 0.5158730158730159, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5757575757575758, 'r': 0.01632302405498282, 'f1': 0.031746031746031744}, 'combined': 0.021164021164021163, 'epoch': 19} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 19} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 19} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 19} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 20 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:48:28.515162: step: 4/77, loss: 0.002364807529374957 2023-01-22 08:48:29.812297: step: 8/77, loss: 0.0034368294291198254 2023-01-22 08:48:31.099727: step: 12/77, loss: 1.7614825992495753e-05 2023-01-22 08:48:32.384211: step: 16/77, loss: 2.296705133630894e-05 2023-01-22 08:48:33.679994: step: 20/77, loss: 1.5452326351805823e-06 2023-01-22 08:48:34.938676: step: 24/77, loss: 0.0002031271142186597 2023-01-22 08:48:36.186866: step: 28/77, loss: 0.00014223124890122563 2023-01-22 08:48:37.461510: step: 32/77, loss: 0.0011827124981209636 2023-01-22 08:48:38.797952: step: 36/77, loss: 0.002860462525859475 2023-01-22 08:48:40.078901: step: 40/77, loss: 5.542115104617551e-06 2023-01-22 08:48:41.406867: step: 44/77, loss: 0.009848167188465595 2023-01-22 08:48:42.692341: step: 48/77, loss: 0.008947617374360561 2023-01-22 08:48:44.021502: step: 52/77, loss: 0.00013507247786037624 2023-01-22 08:48:45.313665: step: 56/77, loss: 0.007452580612152815 2023-01-22 08:48:46.648950: step: 60/77, loss: 1.636518572922796e-05 2023-01-22 08:48:47.896372: step: 64/77, loss: 0.00012901745503768325 2023-01-22 08:48:49.207887: step: 68/77, loss: 7.399632158922032e-05 2023-01-22 08:48:50.499231: step: 72/77, loss: 0.020840534940361977 2023-01-22 08:48:51.820542: step: 76/77, loss: 0.13874861598014832 2023-01-22 08:48:53.104795: step: 80/77, loss: 0.0004687589535024017 2023-01-22 08:48:54.427959: step: 84/77, loss: 0.00034775465610437095 2023-01-22 08:48:55.714610: step: 88/77, loss: 0.0005079308757558465 2023-01-22 08:48:57.013900: step: 92/77, loss: 0.01079073827713728 2023-01-22 08:48:58.294508: step: 96/77, loss: 0.0005955763044767082 2023-01-22 08:48:59.557375: step: 100/77, loss: 0.007185032125562429 2023-01-22 08:49:00.793973: step: 104/77, loss: 0.0006630965508520603 2023-01-22 08:49:02.179828: step: 108/77, loss: 6.602683424716815e-05 2023-01-22 08:49:03.463684: step: 112/77, loss: 4.022960638394579e-05 2023-01-22 08:49:04.749838: step: 116/77, loss: 0.00010086910333484411 2023-01-22 08:49:06.113446: step: 120/77, loss: 0.01682485081255436 2023-01-22 08:49:07.388811: step: 124/77, loss: 3.7639067613781663e-06 2023-01-22 08:49:08.710016: step: 128/77, loss: 3.7704110582126305e-05 2023-01-22 08:49:10.012800: step: 132/77, loss: 0.0004095855401828885 2023-01-22 08:49:11.307616: step: 136/77, loss: 0.002314388519152999 2023-01-22 08:49:12.567142: step: 140/77, loss: 0.001917900750413537 2023-01-22 08:49:13.862496: step: 144/77, loss: 0.00695814611390233 2023-01-22 08:49:15.130036: step: 148/77, loss: 0.00012202710058772936 2023-01-22 08:49:16.438516: step: 152/77, loss: 0.0001240679412148893 2023-01-22 08:49:17.759598: step: 156/77, loss: 5.034492915001465e-06 2023-01-22 08:49:19.062052: step: 160/77, loss: 0.00017982257122639567 2023-01-22 08:49:20.389508: step: 164/77, loss: 0.006036675069481134 2023-01-22 08:49:21.670956: step: 168/77, loss: 0.0010000867769122124 2023-01-22 08:49:22.961098: step: 172/77, loss: 0.00018073295359499753 2023-01-22 08:49:24.298199: step: 176/77, loss: 4.4574870116775855e-05 2023-01-22 08:49:25.596154: step: 180/77, loss: 0.05173643305897713 2023-01-22 08:49:26.920648: step: 184/77, loss: 0.0015796958468854427 2023-01-22 08:49:28.234697: step: 188/77, loss: 0.0010724187595769763 2023-01-22 08:49:29.528722: step: 192/77, loss: 0.004048222675919533 2023-01-22 08:49:30.810545: step: 196/77, loss: 0.009710166603326797 2023-01-22 08:49:32.119603: step: 200/77, loss: 0.028478579595685005 2023-01-22 08:49:33.426540: step: 204/77, loss: 7.516022014897317e-05 2023-01-22 08:49:34.751879: step: 208/77, loss: 0.0027792761102318764 2023-01-22 08:49:36.074395: step: 212/77, loss: 8.034476195462048e-05 2023-01-22 08:49:37.408049: step: 216/77, loss: 9.811624295252841e-06 2023-01-22 08:49:38.684721: step: 220/77, loss: 0.003008501837030053 2023-01-22 08:49:40.011909: step: 224/77, loss: 1.222667378897313e-05 2023-01-22 08:49:41.357781: step: 228/77, loss: 0.0003688423312269151 2023-01-22 08:49:42.664487: step: 232/77, loss: 0.002369387773796916 2023-01-22 08:49:43.993075: step: 236/77, loss: 0.0002195181732531637 2023-01-22 08:49:45.311617: step: 240/77, loss: 0.005662532523274422 2023-01-22 08:49:46.586049: step: 244/77, loss: 1.3159821719455067e-05 2023-01-22 08:49:47.929221: step: 248/77, loss: 0.007855149917304516 2023-01-22 08:49:49.208840: step: 252/77, loss: 0.00671668816357851 2023-01-22 08:49:50.504873: step: 256/77, loss: 0.02250758931040764 2023-01-22 08:49:51.801768: step: 260/77, loss: 0.00010617719090078026 2023-01-22 08:49:53.102360: step: 264/77, loss: 0.019556432962417603 2023-01-22 08:49:54.405635: step: 268/77, loss: 0.001554210321046412 2023-01-22 08:49:55.720200: step: 272/77, loss: 9.581274298398057e-07 2023-01-22 08:49:57.076612: step: 276/77, loss: 0.0031252556946128607 2023-01-22 08:49:58.321275: step: 280/77, loss: 0.022521525621414185 2023-01-22 08:49:59.618931: step: 284/77, loss: 0.005696204490959644 2023-01-22 08:50:00.969740: step: 288/77, loss: 0.00011008291767211631 2023-01-22 08:50:02.260904: step: 292/77, loss: 9.962310286937281e-05 2023-01-22 08:50:03.547953: step: 296/77, loss: 0.008221546187996864 2023-01-22 08:50:04.797490: step: 300/77, loss: 8.898541273083538e-05 2023-01-22 08:50:06.102477: step: 304/77, loss: 3.8327845686580986e-05 2023-01-22 08:50:07.389907: step: 308/77, loss: 0.003969868179410696 2023-01-22 08:50:08.693132: step: 312/77, loss: 0.08264435082674026 2023-01-22 08:50:09.980765: step: 316/77, loss: 0.05371001735329628 2023-01-22 08:50:11.260577: step: 320/77, loss: 5.841174584020337e-07 2023-01-22 08:50:12.553003: step: 324/77, loss: 6.554293941007927e-05 2023-01-22 08:50:13.829764: step: 328/77, loss: 9.15273412829265e-05 2023-01-22 08:50:15.150192: step: 332/77, loss: 0.0001446074602426961 2023-01-22 08:50:16.444094: step: 336/77, loss: 0.0006172170978970826 2023-01-22 08:50:17.712233: step: 340/77, loss: 0.0031737142708152533 2023-01-22 08:50:18.995856: step: 344/77, loss: 0.00707965437322855 2023-01-22 08:50:20.309832: step: 348/77, loss: 5.047888771514408e-05 2023-01-22 08:50:21.633944: step: 352/77, loss: 7.201795961009338e-05 2023-01-22 08:50:22.909349: step: 356/77, loss: 0.00013962757657282054 2023-01-22 08:50:24.193815: step: 360/77, loss: 0.006290529388934374 2023-01-22 08:50:25.532501: step: 364/77, loss: 0.00016902160132303834 2023-01-22 08:50:26.864471: step: 368/77, loss: 0.06438884884119034 2023-01-22 08:50:28.142318: step: 372/77, loss: 4.708188043878181e-06 2023-01-22 08:50:29.489331: step: 376/77, loss: 0.00014791273861192167 2023-01-22 08:50:30.784861: step: 380/77, loss: 7.04221602063626e-05 2023-01-22 08:50:32.114663: step: 384/77, loss: 7.290163193829358e-05 2023-01-22 08:50:33.399616: step: 388/77, loss: 7.028390245977789e-05 ================================================== Loss: 0.007 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Chinese: {'template': {'p': 0.9459459459459459, 'r': 0.5555555555555556, 'f1': 0.7000000000000001}, 'slot': {'p': 0.5897435897435898, 'r': 0.019759450171821305, 'f1': 0.03823773898586866}, 'combined': 0.026766417290108063, 'epoch': 20} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Korean: {'template': {'p': 0.9466666666666667, 'r': 0.5634920634920635, 'f1': 0.7064676616915423}, 'slot': {'p': 0.6052631578947368, 'r': 0.019759450171821305, 'f1': 0.038269550748752074}, 'combined': 0.027036200031456688, 'epoch': 20} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 20} Test Russian: {'template': {'p': 0.9452054794520548, 'r': 0.5476190476190477, 'f1': 0.6934673366834171}, 'slot': {'p': 0.5675675675675675, 'r': 0.01804123711340206, 'f1': 0.03497085761865112}, 'combined': 0.024251147494340975, 'epoch': 20} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 20} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 20} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 20} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Korean: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.7241379310344828, 'r': 0.01804123711340206, 'f1': 0.035205364626990775}, 'combined': 0.02334863560753792, 'epoch': 2} Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 21 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:52:13.871545: step: 4/77, loss: 0.0055143265053629875 2023-01-22 08:52:15.184819: step: 8/77, loss: 0.02832634560763836 2023-01-22 08:52:16.474943: step: 12/77, loss: 0.00014384837413672358 2023-01-22 08:52:17.832680: step: 16/77, loss: 0.0036363855469971895 2023-01-22 08:52:19.157619: step: 20/77, loss: 0.0075181955471634865 2023-01-22 08:52:20.447279: step: 24/77, loss: 0.00023805254022590816 2023-01-22 08:52:21.744534: step: 28/77, loss: 0.003992673009634018 2023-01-22 08:52:23.044233: step: 32/77, loss: 0.0005978421540930867 2023-01-22 08:52:24.388440: step: 36/77, loss: 1.4125964753475273e-06 2023-01-22 08:52:25.709174: step: 40/77, loss: 0.0005768768023699522 2023-01-22 08:52:27.009933: step: 44/77, loss: 8.522043935954571e-05 2023-01-22 08:52:28.309362: step: 48/77, loss: 0.0004115917254239321 2023-01-22 08:52:29.638784: step: 52/77, loss: 7.993353392521385e-06 2023-01-22 08:52:30.889806: step: 56/77, loss: 8.447294385405257e-05 2023-01-22 08:52:32.235417: step: 60/77, loss: 3.577906682039611e-05 2023-01-22 08:52:33.562038: step: 64/77, loss: 0.0001947486452991143 2023-01-22 08:52:34.866455: step: 68/77, loss: 0.0004472629225347191 2023-01-22 08:52:36.191651: step: 72/77, loss: 0.00015355259529314935 2023-01-22 08:52:37.530594: step: 76/77, loss: 0.0003373160434421152 2023-01-22 08:52:38.792436: step: 80/77, loss: 9.838744153967127e-05 2023-01-22 08:52:40.099092: step: 84/77, loss: 1.6177429642993957e-05 2023-01-22 08:52:41.373157: step: 88/77, loss: 0.004022568464279175 2023-01-22 08:52:42.626280: step: 92/77, loss: 3.932239269488491e-05 2023-01-22 08:52:43.951172: step: 96/77, loss: 0.001530507463030517 2023-01-22 08:52:45.230303: step: 100/77, loss: 0.003452786011621356 2023-01-22 08:52:46.516411: step: 104/77, loss: 0.005762745160609484 2023-01-22 08:52:47.804913: step: 108/77, loss: 0.004810965154320002 2023-01-22 08:52:49.120280: step: 112/77, loss: 0.00015320284001063555 2023-01-22 08:52:50.408437: step: 116/77, loss: 5.8151086705038324e-05 2023-01-22 08:52:51.709972: step: 120/77, loss: 4.310378244554158e-06 2023-01-22 08:52:53.036652: step: 124/77, loss: 2.6015195544459857e-06 2023-01-22 08:52:54.360258: step: 128/77, loss: 0.000375100236851722 2023-01-22 08:52:55.664531: step: 132/77, loss: 0.05096989497542381 2023-01-22 08:52:57.011940: step: 136/77, loss: 0.00031754543306306005 2023-01-22 08:52:58.305630: step: 140/77, loss: 0.0001472402800573036 2023-01-22 08:52:59.650206: step: 144/77, loss: 0.0007970663718879223 2023-01-22 08:53:01.001250: step: 148/77, loss: 0.013219388201832771 2023-01-22 08:53:02.263370: step: 152/77, loss: 0.00012928983778692782 2023-01-22 08:53:03.570193: step: 156/77, loss: 3.786048910114914e-05 2023-01-22 08:53:04.842864: step: 160/77, loss: 1.0483473488420714e-05 2023-01-22 08:53:06.140389: step: 164/77, loss: 3.6924047890352085e-05 2023-01-22 08:53:07.469416: step: 168/77, loss: 7.85590509622125e-06 2023-01-22 08:53:08.800440: step: 172/77, loss: 0.043523602187633514 2023-01-22 08:53:10.119567: step: 176/77, loss: 0.0007601756369695067 2023-01-22 08:53:11.448334: step: 180/77, loss: 0.024111945182085037 2023-01-22 08:53:12.762188: step: 184/77, loss: 6.514093911391683e-06 2023-01-22 08:53:14.095959: step: 188/77, loss: 0.0002271834819111973 2023-01-22 08:53:15.444629: step: 192/77, loss: 0.0009495330159552395 2023-01-22 08:53:16.793829: step: 196/77, loss: 1.7016720903484384e-06 2023-01-22 08:53:18.108471: step: 200/77, loss: 9.455228428123519e-05 2023-01-22 08:53:19.432621: step: 204/77, loss: 1.8074692889058497e-06 2023-01-22 08:53:20.778698: step: 208/77, loss: 5.894151854590746e-06 2023-01-22 08:53:22.065620: step: 212/77, loss: 2.3507000150857493e-05 2023-01-22 08:53:23.335510: step: 216/77, loss: 1.6973621313809417e-05 2023-01-22 08:53:24.610097: step: 220/77, loss: 0.022141138091683388 2023-01-22 08:53:25.909364: step: 224/77, loss: 2.8701071641989984e-05 2023-01-22 08:53:27.202254: step: 228/77, loss: 3.997721432824619e-05 2023-01-22 08:53:28.470334: step: 232/77, loss: 3.2146857847692445e-05 2023-01-22 08:53:29.804109: step: 236/77, loss: 1.425421032763552e-05 2023-01-22 08:53:31.091008: step: 240/77, loss: 0.00028763728914782405 2023-01-22 08:53:32.416343: step: 244/77, loss: 0.0006647562840953469 2023-01-22 08:53:33.709865: step: 248/77, loss: 0.0003965311625506729 2023-01-22 08:53:35.047143: step: 252/77, loss: 3.520384052535519e-05 2023-01-22 08:53:36.321384: step: 256/77, loss: 0.0005282217171043158 2023-01-22 08:53:37.608065: step: 260/77, loss: 7.094180546118878e-06 2023-01-22 08:53:38.919293: step: 264/77, loss: 0.0002328925475012511 2023-01-22 08:53:40.243390: step: 268/77, loss: 7.204769644886255e-05 2023-01-22 08:53:41.508029: step: 272/77, loss: 0.0006943023763597012 2023-01-22 08:53:42.791173: step: 276/77, loss: 0.021600721403956413 2023-01-22 08:53:44.059272: step: 280/77, loss: 3.468366776360199e-05 2023-01-22 08:53:45.356975: step: 284/77, loss: 0.0017295520519837737 2023-01-22 08:53:46.670670: step: 288/77, loss: 6.080401362851262e-05 2023-01-22 08:53:48.037117: step: 292/77, loss: 0.004709943663328886 2023-01-22 08:53:49.420922: step: 296/77, loss: 0.04869398847222328 2023-01-22 08:53:50.721027: step: 300/77, loss: 0.000548431882634759 2023-01-22 08:53:52.010600: step: 304/77, loss: 0.00472796568647027 2023-01-22 08:53:53.329394: step: 308/77, loss: 0.011098390445113182 2023-01-22 08:53:54.675468: step: 312/77, loss: 0.00011605924373725429 2023-01-22 08:53:55.978920: step: 316/77, loss: 0.0022660826798528433 2023-01-22 08:53:57.295503: step: 320/77, loss: 0.0375262089073658 2023-01-22 08:53:58.596813: step: 324/77, loss: 0.0008253601845353842 2023-01-22 08:53:59.891068: step: 328/77, loss: 0.00011603911843849346 2023-01-22 08:54:01.215109: step: 332/77, loss: 4.261704589225701e-07 2023-01-22 08:54:02.506358: step: 336/77, loss: 0.00019668148888740689 2023-01-22 08:54:03.877059: step: 340/77, loss: 0.05273066461086273 2023-01-22 08:54:05.163789: step: 344/77, loss: 7.742470188532025e-05 2023-01-22 08:54:06.415139: step: 348/77, loss: 2.0339364255050896e-06 2023-01-22 08:54:07.749652: step: 352/77, loss: 1.1309716683172155e-06 2023-01-22 08:54:09.049847: step: 356/77, loss: 1.6242236711150326e-07 2023-01-22 08:54:10.368108: step: 360/77, loss: 0.06755227595567703 2023-01-22 08:54:11.674752: step: 364/77, loss: 3.2346313219022704e-06 2023-01-22 08:54:12.997021: step: 368/77, loss: 1.4498581549560186e-05 2023-01-22 08:54:14.259176: step: 372/77, loss: 6.510221282951534e-06 2023-01-22 08:54:15.576738: step: 376/77, loss: 0.010036947205662727 2023-01-22 08:54:16.891037: step: 380/77, loss: 2.9802308176840597e-08 2023-01-22 08:54:18.142594: step: 384/77, loss: 2.184322738685296e-06 2023-01-22 08:54:19.485817: step: 388/77, loss: 0.02168990485370159 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4857142857142857, 'r': 0.03213610586011342, 'f1': 0.06028368794326241}, 'combined': 0.04018912529550827, 'epoch': 21} Test Chinese: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.6666666666666666, 'r': 0.01718213058419244, 'f1': 0.03350083752093803}, 'combined': 0.021863704487349027, 'epoch': 21} Dev Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4857142857142857, 'r': 0.03213610586011342, 'f1': 0.06028368794326241}, 'combined': 0.04018912529550827, 'epoch': 21} Test Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.6333333333333333, 'r': 0.01632302405498282, 'f1': 0.031825795644891124}, 'combined': 0.02077051926298157, 'epoch': 21} Dev Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4857142857142857, 'r': 0.03213610586011342, 'f1': 0.06028368794326241}, 'combined': 0.04018912529550827, 'epoch': 21} Test Russian: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.6296296296296297, 'r': 0.014604810996563574, 'f1': 0.028547439126784216}, 'combined': 0.018630960272217063, 'epoch': 21} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 21} Sample Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} New best korean model... ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4857142857142857, 'r': 0.03213610586011342, 'f1': 0.06028368794326241}, 'combined': 0.04018912529550827, 'epoch': 21} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.6333333333333333, 'r': 0.01632302405498282, 'f1': 0.031825795644891124}, 'combined': 0.02077051926298157, 'epoch': 21} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 22 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:56:10.654739: step: 4/77, loss: 0.0003420588036533445 2023-01-22 08:56:12.005626: step: 8/77, loss: 0.00033664965303614736 2023-01-22 08:56:13.318633: step: 12/77, loss: 0.015224933624267578 2023-01-22 08:56:14.645937: step: 16/77, loss: 0.008086070418357849 2023-01-22 08:56:15.929557: step: 20/77, loss: 1.0142309292859863e-05 2023-01-22 08:56:17.294435: step: 24/77, loss: 3.63995241059456e-06 2023-01-22 08:56:18.572985: step: 28/77, loss: 0.0006053458782844245 2023-01-22 08:56:19.862599: step: 32/77, loss: 9.357833050671616e-07 2023-01-22 08:56:21.145845: step: 36/77, loss: 0.0037245291750878096 2023-01-22 08:56:22.447809: step: 40/77, loss: 3.310883403173648e-05 2023-01-22 08:56:23.747728: step: 44/77, loss: 5.7810040743788704e-05 2023-01-22 08:56:25.065100: step: 48/77, loss: 0.02020496316254139 2023-01-22 08:56:26.350003: step: 52/77, loss: 2.893852797569707e-05 2023-01-22 08:56:27.648117: step: 56/77, loss: 2.614892309793504e-06 2023-01-22 08:56:28.933476: step: 60/77, loss: 6.359125109156594e-05 2023-01-22 08:56:30.271737: step: 64/77, loss: 5.637254798784852e-05 2023-01-22 08:56:31.530905: step: 68/77, loss: 0.001058459049090743 2023-01-22 08:56:32.855001: step: 72/77, loss: 0.0012626081006601453 2023-01-22 08:56:34.159006: step: 76/77, loss: 1.6391270563076432e-08 2023-01-22 08:56:35.456540: step: 80/77, loss: 3.704725168063305e-05 2023-01-22 08:56:36.778241: step: 84/77, loss: 1.2144018910476007e-06 2023-01-22 08:56:38.080639: step: 88/77, loss: 0.0022035983856767416 2023-01-22 08:56:39.386069: step: 92/77, loss: 3.0500468710670248e-06 2023-01-22 08:56:40.733008: step: 96/77, loss: 0.0008798028575256467 2023-01-22 08:56:42.009467: step: 100/77, loss: 0.0002495271619409323 2023-01-22 08:56:43.357698: step: 104/77, loss: 0.00026491464814171195 2023-01-22 08:56:44.627242: step: 108/77, loss: 0.0005632839747704566 2023-01-22 08:56:45.935177: step: 112/77, loss: 0.0002990629873238504 2023-01-22 08:56:47.198579: step: 116/77, loss: 0.02526215650141239 2023-01-22 08:56:48.506446: step: 120/77, loss: 0.0003323222335893661 2023-01-22 08:56:49.828336: step: 124/77, loss: 0.01125109102576971 2023-01-22 08:56:51.073301: step: 128/77, loss: 8.091188874459476e-07 2023-01-22 08:56:52.379298: step: 132/77, loss: 4.3213336908820565e-08 2023-01-22 08:56:53.680333: step: 136/77, loss: 0.034145377576351166 2023-01-22 08:56:54.976075: step: 140/77, loss: 3.913377440767363e-05 2023-01-22 08:56:56.277119: step: 144/77, loss: 0.00012151235569035634 2023-01-22 08:56:57.585164: step: 148/77, loss: 1.001347527562757e-06 2023-01-22 08:56:58.872393: step: 152/77, loss: 0.002642788225784898 2023-01-22 08:57:00.170325: step: 156/77, loss: 1.7642250895733014e-06 2023-01-22 08:57:01.478354: step: 160/77, loss: 1.8088079741573893e-05 2023-01-22 08:57:02.785042: step: 164/77, loss: 0.04336974024772644 2023-01-22 08:57:04.116095: step: 168/77, loss: 9.991742263082415e-05 2023-01-22 08:57:05.488998: step: 172/77, loss: 0.029939396306872368 2023-01-22 08:57:06.807298: step: 176/77, loss: 0.002172111766412854 2023-01-22 08:57:08.117706: step: 180/77, loss: 0.021324358880519867 2023-01-22 08:57:09.448431: step: 184/77, loss: 0.09488627314567566 2023-01-22 08:57:10.744074: step: 188/77, loss: 0.04166953265666962 2023-01-22 08:57:12.084759: step: 192/77, loss: 1.5445612007169984e-05 2023-01-22 08:57:13.464749: step: 196/77, loss: 0.0007762728491798043 2023-01-22 08:57:14.756925: step: 200/77, loss: 2.6306921427021734e-05 2023-01-22 08:57:16.060762: step: 204/77, loss: 2.7251383016846376e-06 2023-01-22 08:57:17.394491: step: 208/77, loss: 0.001086265780031681 2023-01-22 08:57:18.661296: step: 212/77, loss: 0.1006154865026474 2023-01-22 08:57:20.006148: step: 216/77, loss: 0.00856444425880909 2023-01-22 08:57:21.328876: step: 220/77, loss: 0.002317016711458564 2023-01-22 08:57:22.599045: step: 224/77, loss: 0.00038656831020489335 2023-01-22 08:57:23.906639: step: 228/77, loss: 3.667514829430729e-05 2023-01-22 08:57:25.193985: step: 232/77, loss: 0.00040591179276816547 2023-01-22 08:57:26.503073: step: 236/77, loss: 0.00016738964768592268 2023-01-22 08:57:27.744228: step: 240/77, loss: 6.972724804654717e-05 2023-01-22 08:57:29.073433: step: 244/77, loss: 0.00665863323956728 2023-01-22 08:57:30.436022: step: 248/77, loss: 0.013775674626231194 2023-01-22 08:57:31.756652: step: 252/77, loss: 0.003292257897555828 2023-01-22 08:57:33.044943: step: 256/77, loss: 3.42726487190248e-08 2023-01-22 08:57:34.365811: step: 260/77, loss: 0.14192891120910645 2023-01-22 08:57:35.671081: step: 264/77, loss: 5.758352926932275e-06 2023-01-22 08:57:37.027464: step: 268/77, loss: 6.13920974501525e-07 2023-01-22 08:57:38.367009: step: 272/77, loss: 0.0001108912329073064 2023-01-22 08:57:39.672170: step: 276/77, loss: 2.2955689928494394e-05 2023-01-22 08:57:40.950804: step: 280/77, loss: 6.733203190378845e-05 2023-01-22 08:57:42.254163: step: 284/77, loss: 0.008186022751033306 2023-01-22 08:57:43.554486: step: 288/77, loss: 0.0001187180751003325 2023-01-22 08:57:44.854617: step: 292/77, loss: 1.2390642041282263e-05 2023-01-22 08:57:46.150793: step: 296/77, loss: 0.0025930129922926426 2023-01-22 08:57:47.433589: step: 300/77, loss: 0.007388572208583355 2023-01-22 08:57:48.766191: step: 304/77, loss: 0.0003645633696578443 2023-01-22 08:57:50.067317: step: 308/77, loss: 0.00046210913569666445 2023-01-22 08:57:51.314804: step: 312/77, loss: 4.128644650336355e-05 2023-01-22 08:57:52.642093: step: 316/77, loss: 0.0057842060923576355 2023-01-22 08:57:53.950576: step: 320/77, loss: 8.53521196404472e-05 2023-01-22 08:57:55.263758: step: 324/77, loss: 0.005444152280688286 2023-01-22 08:57:56.539432: step: 328/77, loss: 0.0006179081392474473 2023-01-22 08:57:57.847848: step: 332/77, loss: 0.0002588433271739632 2023-01-22 08:57:59.148463: step: 336/77, loss: 0.06355436146259308 2023-01-22 08:58:00.477293: step: 340/77, loss: 0.002731876913458109 2023-01-22 08:58:01.840962: step: 344/77, loss: 0.011247079819440842 2023-01-22 08:58:03.125911: step: 348/77, loss: 7.9117133282125e-05 2023-01-22 08:58:04.407462: step: 352/77, loss: 1.2233464985911269e-05 2023-01-22 08:58:05.665054: step: 356/77, loss: 1.663708644628059e-05 2023-01-22 08:58:06.962348: step: 360/77, loss: 0.00011308961984468624 2023-01-22 08:58:08.263938: step: 364/77, loss: 6.027103154337965e-05 2023-01-22 08:58:09.603011: step: 368/77, loss: 0.03185882419347763 2023-01-22 08:58:10.876939: step: 372/77, loss: 5.714062808692688e-06 2023-01-22 08:58:12.212746: step: 376/77, loss: 0.04134390130639076 2023-01-22 08:58:13.486257: step: 380/77, loss: 0.0028718234971165657 2023-01-22 08:58:14.797876: step: 384/77, loss: 8.937079655879643e-06 2023-01-22 08:58:16.086458: step: 388/77, loss: 0.0002531110367272049 ================================================== Loss: 0.009 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 22} Test Chinese: {'template': {'p': 0.9583333333333334, 'r': 0.5476190476190477, 'f1': 0.696969696969697}, 'slot': {'p': 0.6285714285714286, 'r': 0.018900343642611683, 'f1': 0.03669724770642201}, 'combined': 0.02557686961356686, 'epoch': 22} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Korean: {'template': {'p': 0.9452054794520548, 'r': 0.5476190476190477, 'f1': 0.6934673366834171}, 'slot': {'p': 0.5789473684210527, 'r': 0.018900343642611683, 'f1': 0.03660565723793677}, 'combined': 0.02538482763233806, 'epoch': 22} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 22} Test Russian: {'template': {'p': 0.9577464788732394, 'r': 0.5396825396825397, 'f1': 0.6903553299492384}, 'slot': {'p': 0.5428571428571428, 'r': 0.01632302405498282, 'f1': 0.0316930775646372}, 'combined': 0.021879485019241918, 'epoch': 22} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 22} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 22} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4857142857142857, 'r': 0.03213610586011342, 'f1': 0.06028368794326241}, 'combined': 0.04018912529550827, 'epoch': 21} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.6333333333333333, 'r': 0.01632302405498282, 'f1': 0.031825795644891124}, 'combined': 0.02077051926298157, 'epoch': 21} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 23 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 08:59:57.773550: step: 4/77, loss: 0.025402506813406944 2023-01-22 08:59:59.048542: step: 8/77, loss: 1.4856110510663711e-06 2023-01-22 09:00:00.392453: step: 12/77, loss: 0.004074436612427235 2023-01-22 09:00:01.663348: step: 16/77, loss: 0.0014559680130332708 2023-01-22 09:00:03.034307: step: 20/77, loss: 0.00022927409736439586 2023-01-22 09:00:04.287212: step: 24/77, loss: 0.0029677823185920715 2023-01-22 09:00:05.619140: step: 28/77, loss: 0.0007572559406980872 2023-01-22 09:00:06.955707: step: 32/77, loss: 0.03823195397853851 2023-01-22 09:00:08.241094: step: 36/77, loss: 0.008422516286373138 2023-01-22 09:00:09.507105: step: 40/77, loss: 6.584785296581686e-05 2023-01-22 09:00:10.833670: step: 44/77, loss: 1.0373501027061138e-05 2023-01-22 09:00:12.135208: step: 48/77, loss: 0.008303694427013397 2023-01-22 09:00:13.428490: step: 52/77, loss: 0.0062025561928749084 2023-01-22 09:00:14.705740: step: 56/77, loss: 0.06150413677096367 2023-01-22 09:00:16.015444: step: 60/77, loss: 0.0009686066769063473 2023-01-22 09:00:17.292993: step: 64/77, loss: 2.03990521185915e-06 2023-01-22 09:00:18.639287: step: 68/77, loss: 6.0856546042487025e-05 2023-01-22 09:00:19.951978: step: 72/77, loss: 1.973300641111564e-05 2023-01-22 09:00:21.233532: step: 76/77, loss: 2.571762706793379e-06 2023-01-22 09:00:22.507593: step: 80/77, loss: 0.01592273637652397 2023-01-22 09:00:23.807369: step: 84/77, loss: 0.012238798663020134 2023-01-22 09:00:25.120166: step: 88/77, loss: 2.0712558068680664e-07 2023-01-22 09:00:26.419707: step: 92/77, loss: 0.02119985967874527 2023-01-22 09:00:27.691409: step: 96/77, loss: 3.281081262684893e-06 2023-01-22 09:00:28.990452: step: 100/77, loss: 8.314575552503811e-07 2023-01-22 09:00:30.303961: step: 104/77, loss: 0.008776322938501835 2023-01-22 09:00:31.546614: step: 108/77, loss: 0.00039585179183632135 2023-01-22 09:00:32.878930: step: 112/77, loss: 7.510131467824976e-07 2023-01-22 09:00:34.190660: step: 116/77, loss: 0.005819275509566069 2023-01-22 09:00:35.458430: step: 120/77, loss: 0.0008825076511129737 2023-01-22 09:00:36.818247: step: 124/77, loss: 0.0032741157338023186 2023-01-22 09:00:38.099720: step: 128/77, loss: 0.00010210295295109972 2023-01-22 09:00:39.444271: step: 132/77, loss: 0.03434651345014572 2023-01-22 09:00:40.789271: step: 136/77, loss: 6.918517465237528e-05 2023-01-22 09:00:42.087393: step: 140/77, loss: 0.002537723630666733 2023-01-22 09:00:43.411096: step: 144/77, loss: 0.004278878215700388 2023-01-22 09:00:44.693503: step: 148/77, loss: 0.03354043513536453 2023-01-22 09:00:45.972802: step: 152/77, loss: 0.00040915823774412274 2023-01-22 09:00:47.331221: step: 156/77, loss: 3.6666804135165876e-06 2023-01-22 09:00:48.644460: step: 160/77, loss: 1.1175860947787442e-07 2023-01-22 09:00:49.987465: step: 164/77, loss: 2.7298178792989347e-06 2023-01-22 09:00:51.333619: step: 168/77, loss: 0.013857110403478146 2023-01-22 09:00:52.660952: step: 172/77, loss: 1.5452174011443276e-06 2023-01-22 09:00:53.936053: step: 176/77, loss: 0.009256785735487938 2023-01-22 09:00:55.203263: step: 180/77, loss: 8.564258678234182e-06 2023-01-22 09:00:56.538140: step: 184/77, loss: 0.07777054607868195 2023-01-22 09:00:57.833284: step: 188/77, loss: 1.4129879673419055e-05 2023-01-22 09:00:59.097529: step: 192/77, loss: 0.0021475232206285 2023-01-22 09:01:00.398041: step: 196/77, loss: 7.18229387075553e-07 2023-01-22 09:01:01.725076: step: 200/77, loss: 0.017017874866724014 2023-01-22 09:01:03.015344: step: 204/77, loss: 0.0012315193889662623 2023-01-22 09:01:04.337104: step: 208/77, loss: 0.0026159523986279964 2023-01-22 09:01:05.680421: step: 212/77, loss: 1.3963597666588612e-05 2023-01-22 09:01:07.006625: step: 216/77, loss: 7.60200564400293e-05 2023-01-22 09:01:08.333212: step: 220/77, loss: 0.00015289208386093378 2023-01-22 09:01:09.610921: step: 224/77, loss: 1.761947714840062e-05 2023-01-22 09:01:10.910939: step: 228/77, loss: 9.572540147928521e-05 2023-01-22 09:01:12.255545: step: 232/77, loss: 0.3964194059371948 2023-01-22 09:01:13.553970: step: 236/77, loss: 9.985191718442366e-05 2023-01-22 09:01:14.892041: step: 240/77, loss: 0.11299334466457367 2023-01-22 09:01:16.195759: step: 244/77, loss: 1.639120199570243e-07 2023-01-22 09:01:17.481717: step: 248/77, loss: 0.001752063282765448 2023-01-22 09:01:18.774145: step: 252/77, loss: 0.0011091380147263408 2023-01-22 09:01:20.125321: step: 256/77, loss: 0.003398521803319454 2023-01-22 09:01:21.420419: step: 260/77, loss: 0.0009565073414705694 2023-01-22 09:01:22.746498: step: 264/77, loss: 0.00032057648058980703 2023-01-22 09:01:24.006407: step: 268/77, loss: 0.0014985166490077972 2023-01-22 09:01:25.304872: step: 272/77, loss: 2.0800737274839776e-06 2023-01-22 09:01:26.612965: step: 276/77, loss: 0.004631507210433483 2023-01-22 09:01:27.951312: step: 280/77, loss: 2.6374616481916746e-06 2023-01-22 09:01:29.286426: step: 284/77, loss: 9.733112165122293e-06 2023-01-22 09:01:30.591011: step: 288/77, loss: 5.6874618167057633e-05 2023-01-22 09:01:31.899095: step: 292/77, loss: 7.215822552097961e-05 2023-01-22 09:01:33.263731: step: 296/77, loss: 0.0013572302414104342 2023-01-22 09:01:34.590452: step: 300/77, loss: 0.016739701852202415 2023-01-22 09:01:35.954492: step: 304/77, loss: 0.001509991241618991 2023-01-22 09:01:37.203236: step: 308/77, loss: 0.0048863752745091915 2023-01-22 09:01:38.495539: step: 312/77, loss: 0.011294625699520111 2023-01-22 09:01:39.788576: step: 316/77, loss: 0.00012890678772237152 2023-01-22 09:01:41.107635: step: 320/77, loss: 9.1008041636087e-06 2023-01-22 09:01:42.475199: step: 324/77, loss: 0.018667250871658325 2023-01-22 09:01:43.783299: step: 328/77, loss: 8.866010148267378e-07 2023-01-22 09:01:45.086628: step: 332/77, loss: 1.8640942016645567e-06 2023-01-22 09:01:46.403611: step: 336/77, loss: 1.7989448679145426e-05 2023-01-22 09:01:47.699929: step: 340/77, loss: 1.5984442143235356e-05 2023-01-22 09:01:49.038496: step: 344/77, loss: 2.678888813534286e-05 2023-01-22 09:01:50.302798: step: 348/77, loss: 0.003408107440918684 2023-01-22 09:01:51.589060: step: 352/77, loss: 9.685746249488147e-08 2023-01-22 09:01:52.898543: step: 356/77, loss: 1.2738772056763992e-05 2023-01-22 09:01:54.164081: step: 360/77, loss: 7.91678667155793e-06 2023-01-22 09:01:55.509963: step: 364/77, loss: 0.002775351284071803 2023-01-22 09:01:56.843945: step: 368/77, loss: 1.750870251271408e-05 2023-01-22 09:01:58.187037: step: 372/77, loss: 0.003882479388266802 2023-01-22 09:01:59.489228: step: 376/77, loss: 1.9464694560156204e-05 2023-01-22 09:02:00.809882: step: 380/77, loss: 0.00039333669701591134 2023-01-22 09:02:02.181816: step: 384/77, loss: 2.0658637367887422e-05 2023-01-22 09:02:03.520728: step: 388/77, loss: 1.4677294757348136e-06 ================================================== Loss: 0.010 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Chinese: {'template': {'p': 0.9705882352941176, 'r': 0.5238095238095238, 'f1': 0.6804123711340205}, 'slot': {'p': 0.6666666666666666, 'r': 0.020618556701030927, 'f1': 0.04}, 'combined': 0.02721649484536082, 'epoch': 23} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Korean: {'template': {'p': 0.9705882352941176, 'r': 0.5238095238095238, 'f1': 0.6804123711340205}, 'slot': {'p': 0.6571428571428571, 'r': 0.019759450171821305, 'f1': 0.038365304420350285}, 'combined': 0.026104227749929057, 'epoch': 23} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 23} Test Russian: {'template': {'p': 0.9710144927536232, 'r': 0.5317460317460317, 'f1': 0.6871794871794872}, 'slot': {'p': 0.6486486486486487, 'r': 0.020618556701030927, 'f1': 0.03996669442131557}, 'combined': 0.027464292576698905, 'epoch': 23} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 23} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 23} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4857142857142857, 'r': 0.03213610586011342, 'f1': 0.06028368794326241}, 'combined': 0.04018912529550827, 'epoch': 21} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.6333333333333333, 'r': 0.01632302405498282, 'f1': 0.031825795644891124}, 'combined': 0.02077051926298157, 'epoch': 21} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 24 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:03:45.858507: step: 4/77, loss: 9.429901183466427e-06 2023-01-22 09:03:47.232510: step: 8/77, loss: 0.0015411779750138521 2023-01-22 09:03:48.547650: step: 12/77, loss: 0.002688502660021186 2023-01-22 09:03:49.816164: step: 16/77, loss: 9.294860501540825e-05 2023-01-22 09:03:51.152575: step: 20/77, loss: 0.006230478174984455 2023-01-22 09:03:52.438210: step: 24/77, loss: 0.0002460524847265333 2023-01-22 09:03:53.760671: step: 28/77, loss: 0.000616594566963613 2023-01-22 09:03:55.111183: step: 32/77, loss: 0.0003057969734072685 2023-01-22 09:03:56.444581: step: 36/77, loss: 5.055198926129378e-06 2023-01-22 09:03:57.737866: step: 40/77, loss: 0.03458188846707344 2023-01-22 09:03:59.057595: step: 44/77, loss: 7.703703204242629e-07 2023-01-22 09:04:00.378101: step: 48/77, loss: 0.04005847126245499 2023-01-22 09:04:01.673236: step: 52/77, loss: 0.00026904334663413465 2023-01-22 09:04:03.035893: step: 56/77, loss: 5.124694143887609e-05 2023-01-22 09:04:04.386419: step: 60/77, loss: 6.460304575739428e-05 2023-01-22 09:04:05.662360: step: 64/77, loss: 0.00023380067432299256 2023-01-22 09:04:06.976587: step: 68/77, loss: 1.4930650422684266e-06 2023-01-22 09:04:08.250839: step: 72/77, loss: 0.0010681393323466182 2023-01-22 09:04:09.553953: step: 76/77, loss: 0.0012353757629171014 2023-01-22 09:04:10.792183: step: 80/77, loss: 0.003945427946746349 2023-01-22 09:04:12.050058: step: 84/77, loss: 2.697092895687092e-07 2023-01-22 09:04:13.359751: step: 88/77, loss: 2.790932012430858e-05 2023-01-22 09:04:14.695882: step: 92/77, loss: 0.00035998865496367216 2023-01-22 09:04:16.035455: step: 96/77, loss: 4.127090960537316e-06 2023-01-22 09:04:17.372552: step: 100/77, loss: 0.00021439642296172678 2023-01-22 09:04:18.681255: step: 104/77, loss: 1.5853891454753466e-05 2023-01-22 09:04:20.044163: step: 108/77, loss: 0.06753873825073242 2023-01-22 09:04:21.371596: step: 112/77, loss: 0.03835416957736015 2023-01-22 09:04:22.721878: step: 116/77, loss: 3.778714017244056e-06 2023-01-22 09:04:24.067762: step: 120/77, loss: 1.3321557162271347e-05 2023-01-22 09:04:25.360644: step: 124/77, loss: 3.7472609619726427e-06 2023-01-22 09:04:26.619774: step: 128/77, loss: 0.000342967250617221 2023-01-22 09:04:27.968675: step: 132/77, loss: 0.004019541200250387 2023-01-22 09:04:29.260870: step: 136/77, loss: 0.00020471542666200548 2023-01-22 09:04:30.576220: step: 140/77, loss: 2.682182582702808e-07 2023-01-22 09:04:31.927355: step: 144/77, loss: 0.002711113542318344 2023-01-22 09:04:33.230125: step: 148/77, loss: 8.044051355682313e-06 2023-01-22 09:04:34.534200: step: 152/77, loss: 8.048631571000442e-05 2023-01-22 09:04:35.851553: step: 156/77, loss: 0.0005561854341067374 2023-01-22 09:04:37.141309: step: 160/77, loss: 0.0027107931673526764 2023-01-22 09:04:38.450438: step: 164/77, loss: 4.043736043968238e-05 2023-01-22 09:04:39.775201: step: 168/77, loss: 0.0017298419261351228 2023-01-22 09:04:41.053770: step: 172/77, loss: 0.0002148420171579346 2023-01-22 09:04:42.369185: step: 176/77, loss: 0.00018987305520568043 2023-01-22 09:04:43.723295: step: 180/77, loss: 1.3187175227358239e-06 2023-01-22 09:04:45.065260: step: 184/77, loss: 0.002663626568391919 2023-01-22 09:04:46.349591: step: 188/77, loss: 0.0023871776647865772 2023-01-22 09:04:47.703587: step: 192/77, loss: 2.942380160675384e-05 2023-01-22 09:04:49.049481: step: 196/77, loss: 0.03128264844417572 2023-01-22 09:04:50.340321: step: 200/77, loss: 0.0009219897910952568 2023-01-22 09:04:51.669437: step: 204/77, loss: 0.0002257965534226969 2023-01-22 09:04:53.026440: step: 208/77, loss: 0.0021429036278277636 2023-01-22 09:04:54.318177: step: 212/77, loss: 3.0128776415949687e-06 2023-01-22 09:04:55.642786: step: 216/77, loss: 0.00012688209244515747 2023-01-22 09:04:56.952940: step: 220/77, loss: 0.0001109151853597723 2023-01-22 09:04:58.262763: step: 224/77, loss: 1.207463901664596e-05 2023-01-22 09:04:59.546712: step: 228/77, loss: 4.274935236026067e-06 2023-01-22 09:05:00.849757: step: 232/77, loss: 8.942193744587712e-06 2023-01-22 09:05:02.217493: step: 236/77, loss: 0.00014880349044688046 2023-01-22 09:05:03.575691: step: 240/77, loss: 4.067971701715578e-07 2023-01-22 09:05:04.929510: step: 244/77, loss: 1.275551494472893e-05 2023-01-22 09:05:06.229182: step: 248/77, loss: 7.123775503714569e-06 2023-01-22 09:05:07.545377: step: 252/77, loss: 0.03095679171383381 2023-01-22 09:05:08.848794: step: 256/77, loss: 3.175844540237449e-05 2023-01-22 09:05:10.208336: step: 260/77, loss: 1.7672155081527308e-06 2023-01-22 09:05:11.504625: step: 264/77, loss: 0.009948733262717724 2023-01-22 09:05:12.797660: step: 268/77, loss: 0.004043227061629295 2023-01-22 09:05:14.111190: step: 272/77, loss: 0.00035666185431182384 2023-01-22 09:05:15.422333: step: 276/77, loss: 4.450965570867993e-05 2023-01-22 09:05:16.741823: step: 280/77, loss: 0.00424011517316103 2023-01-22 09:05:18.094807: step: 284/77, loss: 2.2053633585983334e-07 2023-01-22 09:05:19.445810: step: 288/77, loss: 3.563959580787923e-06 2023-01-22 09:05:20.815248: step: 292/77, loss: 0.00020834298629779369 2023-01-22 09:05:22.140856: step: 296/77, loss: 4.0977698745336966e-07 2023-01-22 09:05:23.414604: step: 300/77, loss: 0.1088738888502121 2023-01-22 09:05:24.722083: step: 304/77, loss: 0.0008221596363000572 2023-01-22 09:05:26.024723: step: 308/77, loss: 1.533289605504251e-06 2023-01-22 09:05:27.306049: step: 312/77, loss: 0.00010860348993446678 2023-01-22 09:05:28.583708: step: 316/77, loss: 0.0003466178313829005 2023-01-22 09:05:29.871043: step: 320/77, loss: 2.8398906124493806e-06 2023-01-22 09:05:31.130388: step: 324/77, loss: 0.0002615092962514609 2023-01-22 09:05:32.406552: step: 328/77, loss: 3.981458121415926e-06 2023-01-22 09:05:33.688174: step: 332/77, loss: 9.101664545596577e-06 2023-01-22 09:05:34.985734: step: 336/77, loss: 3.844280854536919e-06 2023-01-22 09:05:36.301134: step: 340/77, loss: 4.366013399703661e-07 2023-01-22 09:05:37.670682: step: 344/77, loss: 1.9624180822575e-06 2023-01-22 09:05:39.009696: step: 348/77, loss: 0.00010418868623673916 2023-01-22 09:05:40.327658: step: 352/77, loss: 1.4759563782718033e-05 2023-01-22 09:05:41.646009: step: 356/77, loss: 2.355390097363852e-05 2023-01-22 09:05:42.953744: step: 360/77, loss: 1.1948211067647208e-05 2023-01-22 09:05:44.257397: step: 364/77, loss: 0.0030993595719337463 2023-01-22 09:05:45.567558: step: 368/77, loss: 4.706936579168541e-06 2023-01-22 09:05:46.847022: step: 372/77, loss: 9.344216778117698e-06 2023-01-22 09:05:48.143048: step: 376/77, loss: 2.4437713364022784e-07 2023-01-22 09:05:49.470008: step: 380/77, loss: 1.954923936864361e-05 2023-01-22 09:05:50.766571: step: 384/77, loss: 8.272416380350478e-06 2023-01-22 09:05:52.017219: step: 388/77, loss: 1.017559043248184e-05 ================================================== Loss: 0.004 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Chinese: {'template': {'p': 0.9855072463768116, 'r': 0.5396825396825397, 'f1': 0.6974358974358974}, 'slot': {'p': 0.6764705882352942, 'r': 0.019759450171821305, 'f1': 0.038397328881469114}, 'combined': 0.026779675527588715, 'epoch': 24} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Korean: {'template': {'p': 0.9714285714285714, 'r': 0.5396825396825397, 'f1': 0.6938775510204082}, 'slot': {'p': 0.6388888888888888, 'r': 0.019759450171821305, 'f1': 0.03833333333333333}, 'combined': 0.02659863945578231, 'epoch': 24} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 24} Test Russian: {'template': {'p': 0.9710144927536232, 'r': 0.5317460317460317, 'f1': 0.6871794871794872}, 'slot': {'p': 0.6, 'r': 0.01804123711340206, 'f1': 0.035029190992493735}, 'combined': 0.024071341502534156, 'epoch': 24} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 24} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 24} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 24} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4857142857142857, 'r': 0.03213610586011342, 'f1': 0.06028368794326241}, 'combined': 0.04018912529550827, 'epoch': 21} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.6333333333333333, 'r': 0.01632302405498282, 'f1': 0.031825795644891124}, 'combined': 0.02077051926298157, 'epoch': 21} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 25 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:07:34.171224: step: 4/77, loss: 1.0131721865036525e-05 2023-01-22 09:07:35.509602: step: 8/77, loss: 2.1794825443066657e-05 2023-01-22 09:07:36.785446: step: 12/77, loss: 0.003990492783486843 2023-01-22 09:07:38.067619: step: 16/77, loss: 2.2419793822336942e-05 2023-01-22 09:07:39.379447: step: 20/77, loss: 0.00011453872139099985 2023-01-22 09:07:40.699506: step: 24/77, loss: 0.0009024697938002646 2023-01-22 09:07:42.023484: step: 28/77, loss: 0.0009852326475083828 2023-01-22 09:07:43.344173: step: 32/77, loss: 4.193978020339273e-05 2023-01-22 09:07:44.675628: step: 36/77, loss: 9.90917442322825e-07 2023-01-22 09:07:45.971355: step: 40/77, loss: 1.4200351188264904e-06 2023-01-22 09:07:47.250739: step: 44/77, loss: 1.7364271116093732e-05 2023-01-22 09:07:48.503325: step: 48/77, loss: 0.0018864780431613326 2023-01-22 09:07:49.795405: step: 52/77, loss: 1.4304744127002778e-06 2023-01-22 09:07:51.089638: step: 56/77, loss: 8.415815500484314e-06 2023-01-22 09:07:52.412518: step: 60/77, loss: 0.00016998105274979025 2023-01-22 09:07:53.724511: step: 64/77, loss: 2.388111170148477e-05 2023-01-22 09:07:54.998064: step: 68/77, loss: 3.4421458394717774e-07 2023-01-22 09:07:56.311477: step: 72/77, loss: 5.829046585859032e-06 2023-01-22 09:07:57.586540: step: 76/77, loss: 1.5549348972854204e-05 2023-01-22 09:07:58.870333: step: 80/77, loss: 4.696151154348627e-05 2023-01-22 09:08:00.185870: step: 84/77, loss: 0.00034041781327687204 2023-01-22 09:08:01.518676: step: 88/77, loss: 4.1603518184274435e-05 2023-01-22 09:08:02.851258: step: 92/77, loss: 1.6435512861789903e-06 2023-01-22 09:08:04.175859: step: 96/77, loss: 3.1080940971150994e-05 2023-01-22 09:08:05.509465: step: 100/77, loss: 0.002379967365413904 2023-01-22 09:08:06.817033: step: 104/77, loss: 6.448364729294553e-05 2023-01-22 09:08:08.140983: step: 108/77, loss: 2.125439641531557e-05 2023-01-22 09:08:09.425282: step: 112/77, loss: 0.1124541163444519 2023-01-22 09:08:10.714863: step: 116/77, loss: 2.987526841025101e-06 2023-01-22 09:08:11.993725: step: 120/77, loss: 4.3869185901712626e-05 2023-01-22 09:08:13.273399: step: 124/77, loss: 8.517784590367228e-05 2023-01-22 09:08:14.607383: step: 128/77, loss: 0.0001356957363896072 2023-01-22 09:08:15.948890: step: 132/77, loss: 6.780152489227476e-06 2023-01-22 09:08:17.282474: step: 136/77, loss: 0.00010379239392932504 2023-01-22 09:08:18.575497: step: 140/77, loss: 2.183364995289594e-05 2023-01-22 09:08:19.885264: step: 144/77, loss: 2.7572094040806405e-05 2023-01-22 09:08:21.159239: step: 148/77, loss: 4.101280137547292e-05 2023-01-22 09:08:22.502427: step: 152/77, loss: 5.097959365230054e-05 2023-01-22 09:08:23.814456: step: 156/77, loss: 1.3338012649910524e-05 2023-01-22 09:08:25.107819: step: 160/77, loss: 1.0117785222973907e-06 2023-01-22 09:08:26.423570: step: 164/77, loss: 0.03273176774382591 2023-01-22 09:08:27.711953: step: 168/77, loss: 0.003630247199907899 2023-01-22 09:08:29.010242: step: 172/77, loss: 2.6672981334741053e-07 2023-01-22 09:08:30.347849: step: 176/77, loss: 0.003499385202303529 2023-01-22 09:08:31.647814: step: 180/77, loss: 0.00023101118858903646 2023-01-22 09:08:33.000786: step: 184/77, loss: 0.006367899943143129 2023-01-22 09:08:34.323345: step: 188/77, loss: 0.011398566886782646 2023-01-22 09:08:35.655981: step: 192/77, loss: 2.0384045456012245e-06 2023-01-22 09:08:36.962942: step: 196/77, loss: 6.287941232585581e-06 2023-01-22 09:08:38.217643: step: 200/77, loss: 0.0001234139926964417 2023-01-22 09:08:39.501192: step: 204/77, loss: 0.034921444952487946 2023-01-22 09:08:40.762712: step: 208/77, loss: 5.707017862732755e-07 2023-01-22 09:08:42.048362: step: 212/77, loss: 1.282813445868669e-05 2023-01-22 09:08:43.347610: step: 216/77, loss: 7.301513278434868e-07 2023-01-22 09:08:44.707631: step: 220/77, loss: 0.029432687908411026 2023-01-22 09:08:45.976273: step: 224/77, loss: 3.7668391996703576e-06 2023-01-22 09:08:47.306009: step: 228/77, loss: 0.010214095935225487 2023-01-22 09:08:48.574574: step: 232/77, loss: 6.202467920957133e-05 2023-01-22 09:08:49.950161: step: 236/77, loss: 0.002276056446135044 2023-01-22 09:08:51.251049: step: 240/77, loss: 1.1458730568847386e-06 2023-01-22 09:08:52.571641: step: 244/77, loss: 1.3854609278496355e-05 2023-01-22 09:08:53.851111: step: 248/77, loss: 2.090712769131642e-05 2023-01-22 09:08:55.148751: step: 252/77, loss: 0.012720501981675625 2023-01-22 09:08:56.468789: step: 256/77, loss: 0.010555337183177471 2023-01-22 09:08:57.787604: step: 260/77, loss: 0.0005867365980520844 2023-01-22 09:08:59.129858: step: 264/77, loss: 6.630651569139445e-06 2023-01-22 09:09:00.461388: step: 268/77, loss: 2.6292429538443685e-05 2023-01-22 09:09:01.795580: step: 272/77, loss: 0.0002204696647822857 2023-01-22 09:09:03.096848: step: 276/77, loss: 2.4480957563355332e-06 2023-01-22 09:09:04.449825: step: 280/77, loss: 1.9012670691154199e-06 2023-01-22 09:09:05.729959: step: 284/77, loss: 4.039056420879206e-06 2023-01-22 09:09:07.037377: step: 288/77, loss: 5.880193202756345e-05 2023-01-22 09:09:08.314606: step: 292/77, loss: 2.8312197031254982e-08 2023-01-22 09:09:09.724776: step: 296/77, loss: 5.5917907957336865e-06 2023-01-22 09:09:11.032722: step: 300/77, loss: 1.966946570064465e-07 2023-01-22 09:09:12.337038: step: 304/77, loss: 3.297445118732867e-06 2023-01-22 09:09:13.636504: step: 308/77, loss: 7.820551218173932e-06 2023-01-22 09:09:14.956621: step: 312/77, loss: 1.2486799505495583e-06 2023-01-22 09:09:16.235639: step: 316/77, loss: 3.5506375297700288e-06 2023-01-22 09:09:17.560940: step: 320/77, loss: 2.61664463323541e-05 2023-01-22 09:09:18.856827: step: 324/77, loss: 6.271308848226909e-06 2023-01-22 09:09:20.193442: step: 328/77, loss: 9.149007382802665e-07 2023-01-22 09:09:21.543596: step: 332/77, loss: 0.0011777568142861128 2023-01-22 09:09:22.883595: step: 336/77, loss: 5.036566221860994e-07 2023-01-22 09:09:24.236994: step: 340/77, loss: 0.0036230036057531834 2023-01-22 09:09:25.611466: step: 344/77, loss: 0.010900352150201797 2023-01-22 09:09:26.870695: step: 348/77, loss: 0.001082171918824315 2023-01-22 09:09:28.141025: step: 352/77, loss: 2.9057139272481436e-07 2023-01-22 09:09:29.517451: step: 356/77, loss: 2.0563533098538755e-07 2023-01-22 09:09:30.794473: step: 360/77, loss: 1.3693969549422036e-06 2023-01-22 09:09:32.123551: step: 364/77, loss: 0.006990964524447918 2023-01-22 09:09:33.472135: step: 368/77, loss: 0.004334151744842529 2023-01-22 09:09:34.774199: step: 372/77, loss: 6.668072455795482e-05 2023-01-22 09:09:36.134112: step: 376/77, loss: 0.00010231477790512145 2023-01-22 09:09:37.466099: step: 380/77, loss: 6.392461727955379e-06 2023-01-22 09:09:38.795673: step: 384/77, loss: 1.6689237725131534e-07 2023-01-22 09:09:40.121815: step: 388/77, loss: 0.011087899096310139 ================================================== Loss: 0.003 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Chinese: {'template': {'p': 0.9848484848484849, 'r': 0.5158730158730159, 'f1': 0.6770833333333334}, 'slot': {'p': 0.6571428571428571, 'r': 0.019759450171821305, 'f1': 0.038365304420350285}, 'combined': 0.02597650820127884, 'epoch': 25} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Korean: {'template': {'p': 0.9848484848484849, 'r': 0.5158730158730159, 'f1': 0.6770833333333334}, 'slot': {'p': 0.6388888888888888, 'r': 0.019759450171821305, 'f1': 0.03833333333333333}, 'combined': 0.02595486111111111, 'epoch': 25} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 25} Test Russian: {'template': {'p': 0.9848484848484849, 'r': 0.5158730158730159, 'f1': 0.6770833333333334}, 'slot': {'p': 0.6176470588235294, 'r': 0.01804123711340206, 'f1': 0.035058430717863104}, 'combined': 0.023737479131886476, 'epoch': 25} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 25} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 25} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 25} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4857142857142857, 'r': 0.03213610586011342, 'f1': 0.06028368794326241}, 'combined': 0.04018912529550827, 'epoch': 21} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.6333333333333333, 'r': 0.01632302405498282, 'f1': 0.031825795644891124}, 'combined': 0.02077051926298157, 'epoch': 21} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 26 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:11:21.904482: step: 4/77, loss: 0.0025950754061341286 2023-01-22 09:11:23.273467: step: 8/77, loss: 7.873056347307283e-06 2023-01-22 09:11:24.582124: step: 12/77, loss: 0.0008838302455842495 2023-01-22 09:11:25.869988: step: 16/77, loss: 0.0028116789180785418 2023-01-22 09:11:27.170632: step: 20/77, loss: 3.9636941551179916e-07 2023-01-22 09:11:28.456364: step: 24/77, loss: 3.18428355967626e-05 2023-01-22 09:11:29.736215: step: 28/77, loss: 8.923166751628742e-05 2023-01-22 09:11:31.082908: step: 32/77, loss: 1.8863978766603395e-05 2023-01-22 09:11:32.380808: step: 36/77, loss: 0.004659530706703663 2023-01-22 09:11:33.671782: step: 40/77, loss: 9.939018354998552e-07 2023-01-22 09:11:34.944505: step: 44/77, loss: 0.0086531862616539 2023-01-22 09:11:36.284774: step: 48/77, loss: 0.000130360946059227 2023-01-22 09:11:37.538486: step: 52/77, loss: 0.001125822658650577 2023-01-22 09:11:38.875816: step: 56/77, loss: 8.256652654381469e-05 2023-01-22 09:11:40.165757: step: 60/77, loss: 3.7252627294037666e-07 2023-01-22 09:11:41.448546: step: 64/77, loss: 0.0012953771511092782 2023-01-22 09:11:42.756937: step: 68/77, loss: 2.5371762603754178e-05 2023-01-22 09:11:44.094840: step: 72/77, loss: 0.0005739459302276373 2023-01-22 09:11:45.360845: step: 76/77, loss: 0.0006838550325483084 2023-01-22 09:11:46.682180: step: 80/77, loss: 0.0941888764500618 2023-01-22 09:11:47.996492: step: 84/77, loss: 3.7338620586524485e-06 2023-01-22 09:11:49.323033: step: 88/77, loss: 0.00012075306585757062 2023-01-22 09:11:50.675551: step: 92/77, loss: 0.00017766923701856285 2023-01-22 09:11:51.986866: step: 96/77, loss: 3.5475077311275527e-06 2023-01-22 09:11:53.322308: step: 100/77, loss: 2.7978778234682977e-05 2023-01-22 09:11:54.634666: step: 104/77, loss: 0.00035309020313434303 2023-01-22 09:11:55.927578: step: 108/77, loss: 0.000111517590994481 2023-01-22 09:11:57.232671: step: 112/77, loss: 2.7566898097575177e-07 2023-01-22 09:11:58.534072: step: 116/77, loss: 0.002493783365935087 2023-01-22 09:11:59.841936: step: 120/77, loss: 2.5427108994335867e-05 2023-01-22 09:12:01.188137: step: 124/77, loss: 4.267096301191486e-06 2023-01-22 09:12:02.461975: step: 128/77, loss: 0.0004146482970099896 2023-01-22 09:12:03.740644: step: 132/77, loss: 0.0007785240886732936 2023-01-22 09:12:05.043084: step: 136/77, loss: 2.294766545674065e-07 2023-01-22 09:12:06.336525: step: 140/77, loss: 0.00033424387220293283 2023-01-22 09:12:07.605590: step: 144/77, loss: 0.0014177103294059634 2023-01-22 09:12:08.879821: step: 148/77, loss: 0.0001023170625558123 2023-01-22 09:12:10.195431: step: 152/77, loss: 0.0001036529429256916 2023-01-22 09:12:11.527365: step: 156/77, loss: 0.008335943333804607 2023-01-22 09:12:12.874629: step: 160/77, loss: 0.0015858053229749203 2023-01-22 09:12:14.135745: step: 164/77, loss: 0.0016299583949148655 2023-01-22 09:12:15.434396: step: 168/77, loss: 0.00011356234608683735 2023-01-22 09:12:16.734153: step: 172/77, loss: 0.00010723163723014295 2023-01-22 09:12:18.002200: step: 176/77, loss: 6.407495334315172e-08 2023-01-22 09:12:19.355337: step: 180/77, loss: 4.3071326217614114e-05 2023-01-22 09:12:20.739328: step: 184/77, loss: 2.104633858834859e-05 2023-01-22 09:12:22.042701: step: 188/77, loss: 2.9412876756396145e-06 2023-01-22 09:12:23.377728: step: 192/77, loss: 1.1890861060237512e-06 2023-01-22 09:12:24.701243: step: 196/77, loss: 5.061383490101434e-05 2023-01-22 09:12:25.986340: step: 200/77, loss: 0.002078942721709609 2023-01-22 09:12:27.295241: step: 204/77, loss: 9.075264642888214e-06 2023-01-22 09:12:28.596321: step: 208/77, loss: 0.1539250761270523 2023-01-22 09:12:29.891622: step: 212/77, loss: 5.708212484023534e-05 2023-01-22 09:12:31.191101: step: 216/77, loss: 1.096989944926463e-05 2023-01-22 09:12:32.540950: step: 220/77, loss: 0.00020207525813020766 2023-01-22 09:12:33.780197: step: 224/77, loss: 7.688929599680705e-07 2023-01-22 09:12:35.051220: step: 228/77, loss: 0.0008981174323707819 2023-01-22 09:12:36.379904: step: 232/77, loss: 0.00035968711017630994 2023-01-22 09:12:37.668323: step: 236/77, loss: 1.2278413805688615e-06 2023-01-22 09:12:38.983103: step: 240/77, loss: 1.5705521718700766e-06 2023-01-22 09:12:40.294127: step: 244/77, loss: 1.9653625713544898e-06 2023-01-22 09:12:41.653239: step: 248/77, loss: 3.0335447718243813e-06 2023-01-22 09:12:43.019248: step: 252/77, loss: 3.3941450965357944e-06 2023-01-22 09:12:44.321707: step: 256/77, loss: 3.6431715670914855e-06 2023-01-22 09:12:45.665508: step: 260/77, loss: 1.8580437881610123e-06 2023-01-22 09:12:47.017043: step: 264/77, loss: 0.0007157810614444315 2023-01-22 09:12:48.338381: step: 268/77, loss: 0.0003576005110517144 2023-01-22 09:12:49.646062: step: 272/77, loss: 2.93079983748612e-06 2023-01-22 09:12:50.997252: step: 276/77, loss: 8.270012017419504e-07 2023-01-22 09:12:52.301263: step: 280/77, loss: 7.562554674223065e-05 2023-01-22 09:12:53.657156: step: 284/77, loss: 4.782815267390106e-06 2023-01-22 09:12:54.994946: step: 288/77, loss: 0.02490079402923584 2023-01-22 09:12:56.324414: step: 292/77, loss: 8.150669827955426e-07 2023-01-22 09:12:57.596862: step: 296/77, loss: 3.5999412375531392e-06 2023-01-22 09:12:58.941690: step: 300/77, loss: 4.378571247798391e-05 2023-01-22 09:13:00.261179: step: 304/77, loss: 0.00010184153506997973 2023-01-22 09:13:01.535199: step: 308/77, loss: 0.00016333050734829158 2023-01-22 09:13:02.895458: step: 312/77, loss: 0.00014269737584982067 2023-01-22 09:13:04.256762: step: 316/77, loss: 0.16972936689853668 2023-01-22 09:13:05.572009: step: 320/77, loss: 0.0005670539103448391 2023-01-22 09:13:06.858377: step: 324/77, loss: 0.003000626340508461 2023-01-22 09:13:08.193613: step: 328/77, loss: 0.00014308324898593128 2023-01-22 09:13:09.459591: step: 332/77, loss: 0.0004799831658601761 2023-01-22 09:13:10.746076: step: 336/77, loss: 0.035126760601997375 2023-01-22 09:13:12.095748: step: 340/77, loss: 1.7821416804508772e-06 2023-01-22 09:13:13.415757: step: 344/77, loss: 0.0005418871296569705 2023-01-22 09:13:14.772538: step: 348/77, loss: 0.00010020958143286407 2023-01-22 09:13:16.082759: step: 352/77, loss: 0.019325532019138336 2023-01-22 09:13:17.413004: step: 356/77, loss: 0.00025178532814607024 2023-01-22 09:13:18.684396: step: 360/77, loss: 1.534817783976905e-07 2023-01-22 09:13:20.007321: step: 364/77, loss: 0.0035209807101637125 2023-01-22 09:13:21.325205: step: 368/77, loss: 0.00038489673170261085 2023-01-22 09:13:22.633364: step: 372/77, loss: 0.02655796706676483 2023-01-22 09:13:23.943570: step: 376/77, loss: 0.00249669561162591 2023-01-22 09:13:25.254581: step: 380/77, loss: 1.6391267010362753e-08 2023-01-22 09:13:26.591583: step: 384/77, loss: 2.1010515638408833e-07 2023-01-22 09:13:27.921528: step: 388/77, loss: 7.659033371965052e-07 ================================================== Loss: 0.006 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 26} Test Chinese: {'template': {'p': 0.9848484848484849, 'r': 0.5158730158730159, 'f1': 0.6770833333333334}, 'slot': {'p': 0.6071428571428571, 'r': 0.014604810996563574, 'f1': 0.02852348993288591}, 'combined': 0.01931277964205817, 'epoch': 26} Dev Korean: {'template': {'p': 1.0, 'r': 0.5333333333333333, 'f1': 0.6956521739130436}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.0443247402847249, 'epoch': 26} Test Korean: {'template': {'p': 0.9848484848484849, 'r': 0.5158730158730159, 'f1': 0.6770833333333334}, 'slot': {'p': 0.6206896551724138, 'r': 0.015463917525773196, 'f1': 0.03017602682313495}, 'combined': 0.02043168482816429, 'epoch': 26} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.04756215508903682, 'epoch': 26} Test Russian: {'template': {'p': 0.9850746268656716, 'r': 0.5238095238095238, 'f1': 0.6839378238341969}, 'slot': {'p': 0.5862068965517241, 'r': 0.014604810996563574, 'f1': 0.02849958088851635}, 'combined': 0.01949194133307854, 'epoch': 26} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 26} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 26} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4857142857142857, 'r': 0.03213610586011342, 'f1': 0.06028368794326241}, 'combined': 0.04018912529550827, 'epoch': 21} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.6333333333333333, 'r': 0.01632302405498282, 'f1': 0.031825795644891124}, 'combined': 0.02077051926298157, 'epoch': 21} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 27 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:15:13.170141: step: 4/77, loss: 4.437868847162463e-05 2023-01-22 09:15:14.471587: step: 8/77, loss: 0.005772527772933245 2023-01-22 09:15:15.815416: step: 12/77, loss: 1.5690127838752232e-05 2023-01-22 09:15:17.152016: step: 16/77, loss: 0.0006909964722581208 2023-01-22 09:15:18.530045: step: 20/77, loss: 0.06149635836482048 2023-01-22 09:15:19.805576: step: 24/77, loss: 0.005781150888651609 2023-01-22 09:15:21.099788: step: 28/77, loss: 3.4421262284922705e-07 2023-01-22 09:15:22.400246: step: 32/77, loss: 0.13293160498142242 2023-01-22 09:15:23.716971: step: 36/77, loss: 0.02070668153464794 2023-01-22 09:15:24.992151: step: 40/77, loss: 2.3008822608971968e-05 2023-01-22 09:15:26.281624: step: 44/77, loss: 7.152538472610104e-08 2023-01-22 09:15:27.621660: step: 48/77, loss: 0.0042272391729056835 2023-01-22 09:15:28.927395: step: 52/77, loss: 4.4107048324804055e-07 2023-01-22 09:15:30.274162: step: 56/77, loss: 1.2517935829237103e-05 2023-01-22 09:15:31.550636: step: 60/77, loss: 0.009791519492864609 2023-01-22 09:15:32.845711: step: 64/77, loss: 9.5928437076509e-05 2023-01-22 09:15:34.122154: step: 68/77, loss: 2.2761072614230216e-05 2023-01-22 09:15:35.431069: step: 72/77, loss: 0.0007864056387916207 2023-01-22 09:15:36.735760: step: 76/77, loss: 0.0062559861689805984 2023-01-22 09:15:38.070360: step: 80/77, loss: 0.01806912012398243 2023-01-22 09:15:39.357087: step: 84/77, loss: 0.00015917871496640146 2023-01-22 09:15:40.740164: step: 88/77, loss: 3.1738213692733552e-06 2023-01-22 09:15:42.024494: step: 92/77, loss: 0.0005329230916686356 2023-01-22 09:15:43.307087: step: 96/77, loss: 0.0007474389858543873 2023-01-22 09:15:44.604597: step: 100/77, loss: 0.00035668083000928164 2023-01-22 09:15:45.931319: step: 104/77, loss: 1.0937268370980746e-06 2023-01-22 09:15:47.233444: step: 108/77, loss: 1.0261518582410645e-05 2023-01-22 09:15:48.550386: step: 112/77, loss: 0.0004404282954055816 2023-01-22 09:15:49.838210: step: 116/77, loss: 9.961081559595186e-06 2023-01-22 09:15:51.167370: step: 120/77, loss: 3.652732993941754e-05 2023-01-22 09:15:52.510246: step: 124/77, loss: 9.344317368231714e-05 2023-01-22 09:15:53.767217: step: 128/77, loss: 3.968001237808494e-06 2023-01-22 09:15:55.128137: step: 132/77, loss: 7.186968286987394e-05 2023-01-22 09:15:56.416837: step: 136/77, loss: 0.00017383920203428715 2023-01-22 09:15:57.728046: step: 140/77, loss: 0.0001228848414029926 2023-01-22 09:15:59.020376: step: 144/77, loss: 0.001438188599422574 2023-01-22 09:16:00.294203: step: 148/77, loss: 3.014988760696724e-05 2023-01-22 09:16:01.640372: step: 152/77, loss: 1.8148712115362287e-06 2023-01-22 09:16:03.033985: step: 156/77, loss: 0.005394787061959505 2023-01-22 09:16:04.322628: step: 160/77, loss: 0.0022285841405391693 2023-01-22 09:16:05.623145: step: 164/77, loss: 4.7574019845342264e-05 2023-01-22 09:16:06.884577: step: 168/77, loss: 2.1606575728583266e-07 2023-01-22 09:16:08.222960: step: 172/77, loss: 0.0009196995524689555 2023-01-22 09:16:09.509503: step: 176/77, loss: 0.001570590422488749 2023-01-22 09:16:10.826021: step: 180/77, loss: 5.289816158438043e-07 2023-01-22 09:16:12.130142: step: 184/77, loss: 2.422596480755601e-05 2023-01-22 09:16:13.358587: step: 188/77, loss: 6.407492492144229e-08 2023-01-22 09:16:14.686548: step: 192/77, loss: 4.10639904657728e-06 2023-01-22 09:16:16.064211: step: 196/77, loss: 7.424390787491575e-06 2023-01-22 09:16:17.441841: step: 200/77, loss: 8.687628906045575e-06 2023-01-22 09:16:18.754518: step: 204/77, loss: 4.291505888431857e-07 2023-01-22 09:16:20.052371: step: 208/77, loss: 0.00012478455028031021 2023-01-22 09:16:21.333995: step: 212/77, loss: 0.008036668412387371 2023-01-22 09:16:22.641610: step: 216/77, loss: 8.950755727710202e-05 2023-01-22 09:16:23.923197: step: 220/77, loss: 0.0023669220972806215 2023-01-22 09:16:25.239335: step: 224/77, loss: 1.0340979770262493e-06 2023-01-22 09:16:26.526308: step: 228/77, loss: 0.08163799345493317 2023-01-22 09:16:27.788513: step: 232/77, loss: 0.00021753522742073983 2023-01-22 09:16:29.172088: step: 236/77, loss: 9.374257388117258e-06 2023-01-22 09:16:30.535153: step: 240/77, loss: 0.00026272039394825697 2023-01-22 09:16:31.796920: step: 244/77, loss: 7.570067828055471e-05 2023-01-22 09:16:33.142689: step: 248/77, loss: 0.00039587743231095374 2023-01-22 09:16:34.447528: step: 252/77, loss: 0.0011752621503546834 2023-01-22 09:16:35.759722: step: 256/77, loss: 0.0003574644506443292 2023-01-22 09:16:37.126143: step: 260/77, loss: 1.4300309885584284e-05 2023-01-22 09:16:38.439223: step: 264/77, loss: 0.0002528098411858082 2023-01-22 09:16:39.742452: step: 268/77, loss: 0.0002186048513976857 2023-01-22 09:16:41.097296: step: 272/77, loss: 0.0008742262725718319 2023-01-22 09:16:42.448019: step: 276/77, loss: 0.01561832893639803 2023-01-22 09:16:43.812035: step: 280/77, loss: 1.1299137440801132e-05 2023-01-22 09:16:45.118551: step: 284/77, loss: 0.017644179984927177 2023-01-22 09:16:46.442310: step: 288/77, loss: 0.03774954751133919 2023-01-22 09:16:47.785886: step: 292/77, loss: 2.2633644221059512e-06 2023-01-22 09:16:49.109084: step: 296/77, loss: 0.05433797463774681 2023-01-22 09:16:50.424318: step: 300/77, loss: 5.3644168218625055e-08 2023-01-22 09:16:51.698542: step: 304/77, loss: 0.0002883031265810132 2023-01-22 09:16:53.021180: step: 308/77, loss: 0.00020126851450186223 2023-01-22 09:16:54.328564: step: 312/77, loss: 6.914063988006092e-07 2023-01-22 09:16:55.638562: step: 316/77, loss: 8.880998620952596e-07 2023-01-22 09:16:56.943372: step: 320/77, loss: 6.634901183133479e-06 2023-01-22 09:16:58.287848: step: 324/77, loss: 0.000134270143462345 2023-01-22 09:16:59.590538: step: 328/77, loss: 0.0009469084907323122 2023-01-22 09:17:00.931050: step: 332/77, loss: 2.5419665234949207e-06 2023-01-22 09:17:02.200920: step: 336/77, loss: 2.7366057111066766e-05 2023-01-22 09:17:03.528529: step: 340/77, loss: 0.00013289826165419072 2023-01-22 09:17:04.897088: step: 344/77, loss: 1.9423972844379023e-05 2023-01-22 09:17:06.195662: step: 348/77, loss: 0.004998302552849054 2023-01-22 09:17:07.478865: step: 352/77, loss: 7.904337508080062e-06 2023-01-22 09:17:08.820824: step: 356/77, loss: 0.0055083585903048515 2023-01-22 09:17:10.087950: step: 360/77, loss: 4.9315560318063945e-05 2023-01-22 09:17:11.402041: step: 364/77, loss: 1.777686293280567e-06 2023-01-22 09:17:12.683150: step: 368/77, loss: 4.1571020119590685e-06 2023-01-22 09:17:14.008633: step: 372/77, loss: 0.00030079163843765855 2023-01-22 09:17:15.338290: step: 376/77, loss: 8.303288268507458e-06 2023-01-22 09:17:16.654594: step: 380/77, loss: 0.00270866765640676 2023-01-22 09:17:17.983316: step: 384/77, loss: 8.264104690169916e-05 2023-01-22 09:17:19.305359: step: 388/77, loss: 0.0020972429774701595 ================================================== Loss: 0.005 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 27} Test Chinese: {'template': {'p': 0.9577464788732394, 'r': 0.5396825396825397, 'f1': 0.6903553299492384}, 'slot': {'p': 0.6190476190476191, 'r': 0.022336769759450172, 'f1': 0.04311774461028192}, 'combined': 0.029766564807098172, 'epoch': 27} Dev Korean: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 27} Test Korean: {'template': {'p': 0.9444444444444444, 'r': 0.5396825396825397, 'f1': 0.6868686868686867}, 'slot': {'p': 0.6190476190476191, 'r': 0.022336769759450172, 'f1': 0.04311774461028192}, 'combined': 0.02961622862120374, 'epoch': 27} Dev Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.4878048780487805, 'r': 0.03780718336483932, 'f1': 0.07017543859649122}, 'combined': 0.051708217913204055, 'epoch': 27} Test Russian: {'template': {'p': 0.9444444444444444, 'r': 0.5396825396825397, 'f1': 0.6868686868686867}, 'slot': {'p': 0.6190476190476191, 'r': 0.022336769759450172, 'f1': 0.04311774461028192}, 'combined': 0.02961622862120374, 'epoch': 27} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 27} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 27} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 27} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4857142857142857, 'r': 0.03213610586011342, 'f1': 0.06028368794326241}, 'combined': 0.04018912529550827, 'epoch': 21} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.6333333333333333, 'r': 0.01632302405498282, 'f1': 0.031825795644891124}, 'combined': 0.02077051926298157, 'epoch': 21} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 28 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:19:01.255616: step: 4/77, loss: 8.23704685899429e-05 2023-01-22 09:19:02.635570: step: 8/77, loss: 0.03628441318869591 2023-01-22 09:19:03.963046: step: 12/77, loss: 0.0022851990070194006 2023-01-22 09:19:05.270421: step: 16/77, loss: 1.4841228903605952e-06 2023-01-22 09:19:06.578635: step: 20/77, loss: 0.0012596538290381432 2023-01-22 09:19:07.861515: step: 24/77, loss: 0.0032123939599841833 2023-01-22 09:19:09.125461: step: 28/77, loss: 2.28930566663621e-05 2023-01-22 09:19:10.494785: step: 32/77, loss: 1.264566344616469e-05 2023-01-22 09:19:11.811874: step: 36/77, loss: 0.003975663799792528 2023-01-22 09:19:13.107346: step: 40/77, loss: 4.4445740059018135e-05 2023-01-22 09:19:14.455542: step: 44/77, loss: 0.00046154105802997947 2023-01-22 09:19:15.811546: step: 48/77, loss: 3.884429588651983e-06 2023-01-22 09:19:17.126755: step: 52/77, loss: 0.00010425122309243307 2023-01-22 09:19:18.450844: step: 56/77, loss: 1.2432614312274382e-05 2023-01-22 09:19:19.748804: step: 60/77, loss: 5.903623241465539e-05 2023-01-22 09:19:21.045054: step: 64/77, loss: 0.04321976378560066 2023-01-22 09:19:22.372353: step: 68/77, loss: 0.0008174843387678266 2023-01-22 09:19:23.654797: step: 72/77, loss: 0.06357888877391815 2023-01-22 09:19:24.922147: step: 76/77, loss: 7.065344561851816e-06 2023-01-22 09:19:26.260502: step: 80/77, loss: 5.1889550377381966e-05 2023-01-22 09:19:27.578702: step: 84/77, loss: 9.714143379824236e-05 2023-01-22 09:19:28.903397: step: 88/77, loss: 0.04682736471295357 2023-01-22 09:19:30.253307: step: 92/77, loss: 1.6703713754395721e-06 2023-01-22 09:19:31.565217: step: 96/77, loss: 0.0002513027866370976 2023-01-22 09:19:32.878829: step: 100/77, loss: 5.727513325837208e-06 2023-01-22 09:19:34.173880: step: 104/77, loss: 3.5202683648094535e-05 2023-01-22 09:19:35.495835: step: 108/77, loss: 5.169656651560217e-05 2023-01-22 09:19:36.847040: step: 112/77, loss: 2.1099415334902005e-06 2023-01-22 09:19:38.174302: step: 116/77, loss: 0.0002199725859099999 2023-01-22 09:19:39.520109: step: 120/77, loss: 3.053050022572279e-06 2023-01-22 09:19:40.843576: step: 124/77, loss: 4.4143645936856046e-05 2023-01-22 09:19:42.143676: step: 128/77, loss: 0.00018353867926634848 2023-01-22 09:19:43.470088: step: 132/77, loss: 0.002969453576952219 2023-01-22 09:19:44.785363: step: 136/77, loss: 0.0013011035043746233 2023-01-22 09:19:46.091398: step: 140/77, loss: 0.00010733507224358618 2023-01-22 09:19:47.363468: step: 144/77, loss: 5.396897904574871e-06 2023-01-22 09:19:48.711026: step: 148/77, loss: 5.68100149394013e-05 2023-01-22 09:19:50.043835: step: 152/77, loss: 5.4925600124988705e-05 2023-01-22 09:19:51.346188: step: 156/77, loss: 0.007555659394711256 2023-01-22 09:19:52.700349: step: 160/77, loss: 0.0002406742423772812 2023-01-22 09:19:53.965553: step: 164/77, loss: 5.572999270953005e-07 2023-01-22 09:19:55.253126: step: 168/77, loss: 1.873036808319739e-06 2023-01-22 09:19:56.601854: step: 172/77, loss: 0.006020023487508297 2023-01-22 09:19:57.887362: step: 176/77, loss: 0.0025768810883164406 2023-01-22 09:19:59.160974: step: 180/77, loss: 0.010427961125969887 2023-01-22 09:20:00.473083: step: 184/77, loss: 2.0492634575930424e-05 2023-01-22 09:20:01.792228: step: 188/77, loss: 2.3690738089499064e-05 2023-01-22 09:20:03.123971: step: 192/77, loss: 2.4855458832462318e-05 2023-01-22 09:20:04.444689: step: 196/77, loss: 0.019199082627892494 2023-01-22 09:20:05.704582: step: 200/77, loss: 7.696493412368e-06 2023-01-22 09:20:06.991067: step: 204/77, loss: 0.00044914853060618043 2023-01-22 09:20:08.327528: step: 208/77, loss: 0.0013580780941992998 2023-01-22 09:20:09.652467: step: 212/77, loss: 0.00012708050780929625 2023-01-22 09:20:10.987950: step: 216/77, loss: 3.2884518077480607e-06 2023-01-22 09:20:12.259923: step: 220/77, loss: 5.277245691104326e-06 2023-01-22 09:20:13.535586: step: 224/77, loss: 0.00012476358097046614 2023-01-22 09:20:14.812148: step: 228/77, loss: 7.499421917600557e-05 2023-01-22 09:20:16.089187: step: 232/77, loss: 0.025630852207541466 2023-01-22 09:20:17.399273: step: 236/77, loss: 0.003062444506213069 2023-01-22 09:20:18.717716: step: 240/77, loss: 6.049780836292484e-07 2023-01-22 09:20:20.033253: step: 244/77, loss: 2.0234774638083763e-06 2023-01-22 09:20:21.339156: step: 248/77, loss: 1.480571063439129e-05 2023-01-22 09:20:22.669267: step: 252/77, loss: 8.42006120365113e-05 2023-01-22 09:20:23.952256: step: 256/77, loss: 0.00019058524048887193 2023-01-22 09:20:25.233921: step: 260/77, loss: 0.04819241166114807 2023-01-22 09:20:26.544868: step: 264/77, loss: 2.4820839826134034e-05 2023-01-22 09:20:27.915417: step: 268/77, loss: 0.03885664418339729 2023-01-22 09:20:29.241382: step: 272/77, loss: 4.239127520122565e-06 2023-01-22 09:20:30.565683: step: 276/77, loss: 1.46977827171213e-05 2023-01-22 09:20:31.923385: step: 280/77, loss: 8.833389438223094e-05 2023-01-22 09:20:33.270085: step: 284/77, loss: 1.5189934856607579e-05 2023-01-22 09:20:34.591186: step: 288/77, loss: 4.405963863973739e-06 2023-01-22 09:20:35.902701: step: 292/77, loss: 4.4703088519781886e-07 2023-01-22 09:20:37.247452: step: 296/77, loss: 4.758917839353671e-06 2023-01-22 09:20:38.506463: step: 300/77, loss: 3.8925314584048465e-05 2023-01-22 09:20:39.793550: step: 304/77, loss: 0.017981529235839844 2023-01-22 09:20:41.142297: step: 308/77, loss: 1.9454097127891146e-05 2023-01-22 09:20:42.474586: step: 312/77, loss: 2.0712566595193493e-07 2023-01-22 09:20:43.761224: step: 316/77, loss: 0.0007723932503722608 2023-01-22 09:20:45.038419: step: 320/77, loss: 1.5888794223428704e-05 2023-01-22 09:20:46.321527: step: 324/77, loss: 1.3279091035656165e-05 2023-01-22 09:20:47.621562: step: 328/77, loss: 0.0009476257837377489 2023-01-22 09:20:48.966814: step: 332/77, loss: 0.512925922870636 2023-01-22 09:20:50.312289: step: 336/77, loss: 9.29001034819521e-05 2023-01-22 09:20:51.622539: step: 340/77, loss: 1.1510443073348142e-05 2023-01-22 09:20:52.959957: step: 344/77, loss: 3.337846692375024e-07 2023-01-22 09:20:54.242699: step: 348/77, loss: 1.0945473150059115e-05 2023-01-22 09:20:55.550323: step: 352/77, loss: 0.02763889729976654 2023-01-22 09:20:56.897023: step: 356/77, loss: 0.0008513939683325589 2023-01-22 09:20:58.194853: step: 360/77, loss: 0.0003253959002904594 2023-01-22 09:20:59.490820: step: 364/77, loss: 1.0048305739474017e-05 2023-01-22 09:21:00.776657: step: 368/77, loss: 0.0001279129646718502 2023-01-22 09:21:02.121642: step: 372/77, loss: 4.5150295591156464e-07 2023-01-22 09:21:03.471900: step: 376/77, loss: 0.0006518355221487582 2023-01-22 09:21:04.821843: step: 380/77, loss: 0.025820119306445122 2023-01-22 09:21:06.166013: step: 384/77, loss: 2.8324179766059387e-06 2023-01-22 09:21:07.453422: step: 388/77, loss: 0.00029903562972322106 ================================================== Loss: 0.010 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 28} Test Chinese: {'template': {'p': 0.9714285714285714, 'r': 0.5396825396825397, 'f1': 0.6938775510204082}, 'slot': {'p': 0.6785714285714286, 'r': 0.01632302405498282, 'f1': 0.03187919463087248}, 'combined': 0.02212025749897274, 'epoch': 28} Dev Korean: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 28} Test Korean: {'template': {'p': 0.971830985915493, 'r': 0.5476190476190477, 'f1': 0.7005076142131981}, 'slot': {'p': 0.6333333333333333, 'r': 0.01632302405498282, 'f1': 0.031825795644891124}, 'combined': 0.02229421217763947, 'epoch': 28} Dev Russian: {'template': {'p': 1.0, 'r': 0.55, 'f1': 0.7096774193548387}, 'slot': {'p': 0.5, 'r': 0.034026465028355386, 'f1': 0.06371681415929203}, 'combined': 0.04521838424207822, 'epoch': 28} Test Russian: {'template': {'p': 0.9710144927536232, 'r': 0.5317460317460317, 'f1': 0.6871794871794872}, 'slot': {'p': 0.6428571428571429, 'r': 0.015463917525773196, 'f1': 0.030201342281879193}, 'combined': 0.020753742901393906, 'epoch': 28} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 28} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} Sample Russian: {'template': {'p': 1.0, 'r': 0.25, 'f1': 0.4}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 28} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4857142857142857, 'r': 0.03213610586011342, 'f1': 0.06028368794326241}, 'combined': 0.04018912529550827, 'epoch': 21} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.6333333333333333, 'r': 0.01632302405498282, 'f1': 0.031825795644891124}, 'combined': 0.02077051926298157, 'epoch': 21} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2} ****************************** Epoch: 29 command: python train.py --model_name template --xlmr_model_name xlm-roberta-large --batch_size 10 --xlmr_learning_rate 2e-5 --event_hidden_num 450 --accumulate_step 4 --max_epoch 30 --p1_data_weight 0.1 --learning_rate 2e-4 2023-01-22 09:22:49.434549: step: 4/77, loss: 1.0125928383786231e-05 2023-01-22 09:22:50.775637: step: 8/77, loss: 0.0001004632722469978 2023-01-22 09:22:52.103943: step: 12/77, loss: 3.3080419825637364e-07 2023-01-22 09:22:53.406617: step: 16/77, loss: 0.05448224022984505 2023-01-22 09:22:54.717059: step: 20/77, loss: 1.299353243666701e-06 2023-01-22 09:22:55.936900: step: 24/77, loss: 0.0009496076963841915 2023-01-22 09:22:57.222881: step: 28/77, loss: 4.876495495409472e-06 2023-01-22 09:22:58.549174: step: 32/77, loss: 0.0002735431771725416 2023-01-22 09:22:59.855874: step: 36/77, loss: 0.02843218483030796 2023-01-22 09:23:01.191489: step: 40/77, loss: 0.03640910983085632 2023-01-22 09:23:02.511406: step: 44/77, loss: 0.0013255748199298978 2023-01-22 09:23:03.825824: step: 48/77, loss: 0.004339543171226978 2023-01-22 09:23:05.135768: step: 52/77, loss: 1.2665964277402963e-07 2023-01-22 09:23:06.449359: step: 56/77, loss: 1.5124039691727376e-06 2023-01-22 09:23:07.748558: step: 60/77, loss: 2.8548668069561245e-06 2023-01-22 09:23:09.081656: step: 64/77, loss: 2.2655663997284137e-05 2023-01-22 09:23:10.370610: step: 68/77, loss: 0.00011660682503134012 2023-01-22 09:23:11.698578: step: 72/77, loss: 5.665731350745773e-06 2023-01-22 09:23:13.023738: step: 76/77, loss: 1.2989067727176007e-05 2023-01-22 09:23:14.289783: step: 80/77, loss: 2.1367504814406857e-06 2023-01-22 09:23:15.572829: step: 84/77, loss: 0.0001403230126015842 2023-01-22 09:23:16.875287: step: 88/77, loss: 0.002301453612744808 2023-01-22 09:23:18.261752: step: 92/77, loss: 0.002005885588005185 2023-01-22 09:23:19.568056: step: 96/77, loss: 1.513984716439154e-05 2023-01-22 09:23:20.857966: step: 100/77, loss: 1.181620177703735e-06 2023-01-22 09:23:22.190467: step: 104/77, loss: 0.00012567572412081063 2023-01-22 09:23:23.526881: step: 108/77, loss: 0.009385241195559502 2023-01-22 09:23:24.874398: step: 112/77, loss: 3.427238084441342e-07 2023-01-22 09:23:26.181983: step: 116/77, loss: 6.252223101910204e-05 2023-01-22 09:23:27.478635: step: 120/77, loss: 0.00014491446199826896 2023-01-22 09:23:28.853283: step: 124/77, loss: 0.022527460008859634 2023-01-22 09:23:30.131017: step: 128/77, loss: 4.6917880354158115e-06 2023-01-22 09:23:31.391057: step: 132/77, loss: 6.645842631769483e-07 2023-01-22 09:23:32.706263: step: 136/77, loss: 0.016113124787807465 2023-01-22 09:23:34.000921: step: 140/77, loss: 0.00011516553058754653 2023-01-22 09:23:35.279862: step: 144/77, loss: 3.055720662814565e-05 2023-01-22 09:23:36.573170: step: 148/77, loss: 5.319628826327971e-07 2023-01-22 09:23:37.871671: step: 152/77, loss: 1.8640930647961795e-06 2023-01-22 09:23:39.188461: step: 156/77, loss: 0.0004583533154800534 2023-01-22 09:23:40.520323: step: 160/77, loss: 2.622595616230683e-07 2023-01-22 09:23:41.847026: step: 164/77, loss: 0.00019146154227200896 2023-01-22 09:23:43.092118: step: 168/77, loss: 5.960464122267695e-09 2023-01-22 09:23:44.473675: step: 172/77, loss: 9.83475558768987e-08 2023-01-22 09:23:45.805667: step: 176/77, loss: 0.001004547462798655 2023-01-22 09:23:47.063957: step: 180/77, loss: 3.3479436751804315e-06 2023-01-22 09:23:48.390965: step: 184/77, loss: 3.759380433621118e-06 2023-01-22 09:23:49.708030: step: 188/77, loss: 0.00048191455425694585 2023-01-22 09:23:51.087433: step: 192/77, loss: 6.87026113155298e-05 2023-01-22 09:23:52.387593: step: 196/77, loss: 3.829567276625312e-07 2023-01-22 09:23:53.666295: step: 200/77, loss: 0.017844675108790398 2023-01-22 09:23:55.034869: step: 204/77, loss: 1.385804893061504e-07 2023-01-22 09:23:56.379843: step: 208/77, loss: 4.617567356035579e-06 2023-01-22 09:23:57.664860: step: 212/77, loss: 0.001500986167229712 2023-01-22 09:23:58.952478: step: 216/77, loss: 1.2352752492006402e-06 2023-01-22 09:24:00.251252: step: 220/77, loss: 8.01673536443559e-07 2023-01-22 09:24:01.581503: step: 224/77, loss: 7.582730904687196e-05 2023-01-22 09:24:02.923634: step: 228/77, loss: 0.0006666135741397738 2023-01-22 09:24:04.236565: step: 232/77, loss: 2.8460976864153054e-07 2023-01-22 09:24:05.596211: step: 236/77, loss: 1.0624310107232304e-06 2023-01-22 09:24:06.917053: step: 240/77, loss: 1.4111069503996987e-06 2023-01-22 09:24:08.217181: step: 244/77, loss: 6.237941306608263e-06 2023-01-22 09:24:09.546902: step: 248/77, loss: 5.6616836445755325e-06 2023-01-22 09:24:10.856884: step: 252/77, loss: 0.001264326274394989 2023-01-22 09:24:12.186018: step: 256/77, loss: 2.1888276933168527e-06 2023-01-22 09:24:13.518828: step: 260/77, loss: 5.51342402843602e-08 2023-01-22 09:24:14.814136: step: 264/77, loss: 8.299799105770944e-07 2023-01-22 09:24:16.075839: step: 268/77, loss: 0.0027380480896681547 2023-01-22 09:24:17.386829: step: 272/77, loss: 0.02482440136373043 2023-01-22 09:24:18.715743: step: 276/77, loss: 4.0913464545155875e-06 2023-01-22 09:24:20.052181: step: 280/77, loss: 0.00029373884899541736 2023-01-22 09:24:21.367870: step: 284/77, loss: 0.0003284189442638308 2023-01-22 09:24:22.670032: step: 288/77, loss: 1.0624784408719279e-05 2023-01-22 09:24:23.979387: step: 292/77, loss: 7.0821461122250184e-06 2023-01-22 09:24:25.234862: step: 296/77, loss: 9.566343806000077e-07 2023-01-22 09:24:26.558234: step: 300/77, loss: 3.900290175806731e-05 2023-01-22 09:24:27.892948: step: 304/77, loss: 0.0006133618298918009 2023-01-22 09:24:29.154648: step: 308/77, loss: 1.862150566012133e-05 2023-01-22 09:24:30.490615: step: 312/77, loss: 4.497852387430612e-06 2023-01-22 09:24:31.812311: step: 316/77, loss: 3.9124729482864495e-06 2023-01-22 09:24:33.136428: step: 320/77, loss: 0.03277048468589783 2023-01-22 09:24:34.433562: step: 324/77, loss: 1.156323924078606e-05 2023-01-22 09:24:35.757848: step: 328/77, loss: 0.00013471973943524063 2023-01-22 09:24:37.068036: step: 332/77, loss: 8.426280692219734e-05 2023-01-22 09:24:38.384743: step: 336/77, loss: 3.1888390594758675e-07 2023-01-22 09:24:39.745068: step: 340/77, loss: 4.12757827916721e-07 2023-01-22 09:24:41.041489: step: 344/77, loss: 0.03155367448925972 2023-01-22 09:24:42.336067: step: 348/77, loss: 2.0414552182046464e-07 2023-01-22 09:24:43.629218: step: 352/77, loss: 8.952095413405914e-06 2023-01-22 09:24:44.971463: step: 356/77, loss: 7.450580152834618e-09 2023-01-22 09:24:46.284932: step: 360/77, loss: 0.02338743396103382 2023-01-22 09:24:47.603907: step: 364/77, loss: 0.0003410083882045001 2023-01-22 09:24:48.990221: step: 368/77, loss: 2.2649727782209084e-07 2023-01-22 09:24:50.263118: step: 372/77, loss: 3.186542016919702e-05 2023-01-22 09:24:51.580210: step: 376/77, loss: 1.3230645890871529e-05 2023-01-22 09:24:52.861393: step: 380/77, loss: 9.23870828728468e-08 2023-01-22 09:24:54.174607: step: 384/77, loss: 6.874396785860881e-05 2023-01-22 09:24:55.510951: step: 388/77, loss: 8.806327400634473e-07 ================================================== Loss: 0.003 -------------------- Dev Chinese: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 29} Test Chinese: {'template': {'p': 0.9701492537313433, 'r': 0.5158730158730159, 'f1': 0.6735751295336788}, 'slot': {'p': 0.6666666666666666, 'r': 0.020618556701030927, 'f1': 0.04}, 'combined': 0.02694300518134715, 'epoch': 29} Dev Korean: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 29} Test Korean: {'template': {'p': 0.9565217391304348, 'r': 0.5238095238095238, 'f1': 0.6769230769230768}, 'slot': {'p': 0.631578947368421, 'r': 0.020618556701030927, 'f1': 0.03993344425956739}, 'combined': 0.027031869960322537, 'epoch': 29} Dev Russian: {'template': {'p': 1.0, 'r': 0.5666666666666667, 'f1': 0.7234042553191489}, 'slot': {'p': 0.5, 'r': 0.035916824196597356, 'f1': 0.0670194003527337}, 'combined': 0.048482119404105226, 'epoch': 29} Test Russian: {'template': {'p': 0.9565217391304348, 'r': 0.5238095238095238, 'f1': 0.6769230769230768}, 'slot': {'p': 0.6052631578947368, 'r': 0.019759450171821305, 'f1': 0.038269550748752074}, 'combined': 0.025905542045309093, 'epoch': 29} Sample Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 29} Sample Korean: {'template': {'p': 0.5, 'r': 0.5, 'f1': 0.5}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 29} Sample Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.034482758620689655, 'f1': 0.06451612903225806}, 'combined': 0.04301075268817204, 'epoch': 29} ================================================== Current best result: -------------------- Dev for Chinese: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Chinese: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Chinese: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.5, 'r': 0.02857142857142857, 'f1': 0.05405405405405405}, 'combined': 0.03603603603603603, 'epoch': 2} -------------------- Dev for Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.4857142857142857, 'r': 0.03213610586011342, 'f1': 0.06028368794326241}, 'combined': 0.04018912529550827, 'epoch': 21} Test for Korean: {'template': {'p': 0.96875, 'r': 0.49206349206349204, 'f1': 0.6526315789473683}, 'slot': {'p': 0.6333333333333333, 'r': 0.01632302405498282, 'f1': 0.031825795644891124}, 'combined': 0.02077051926298157, 'epoch': 21} Korean: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 21} -------------------- Dev for Russian: {'template': {'p': 1.0, 'r': 0.5833333333333334, 'f1': 0.7368421052631579}, 'slot': {'p': 0.5, 'r': 0.03780718336483932, 'f1': 0.07029876977152899}, 'combined': 0.05179909351586346, 'epoch': 2} Test for Russian: {'template': {'p': 0.9552238805970149, 'r': 0.5079365079365079, 'f1': 0.6632124352331605}, 'slot': {'p': 0.6896551724137931, 'r': 0.01718213058419244, 'f1': 0.03352891869237217}, 'combined': 0.022236795816702785, 'epoch': 2} Russian: {'template': {'p': 1.0, 'r': 0.5, 'f1': 0.6666666666666666}, 'slot': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'combined': 0.0, 'epoch': 2}