代码运行问题 about lilt HOT 8 CLOSED

jpwang commented on August 12, 2024

代码运行问题

from lilt.

Comments (8)

whalefa1I commented on August 12, 2024 1

我的transformers库是4.25.1的

from lilt.

cactusgame commented on August 12, 2024 1

你好，我是东华大学的一名研究生。很荣幸能够阅读你这么优秀的文章，并进行复现。现在我在运行你的实例代码时，碰到了一些问题：希望你能抽空看看，解决一下。十分感谢！

transformer库的问题，改一下/LiLT/LiLTfinetune里面的init文件就行


from collections import OrderedDict
import types
from transformers import CONFIG_MAPPING, MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, MODEL_NAMES_MAPPING, TOKENIZER_MAPPING
from transformers.convert_slow_tokenizer import SLOW_TO_FAST_CONVERTERS, BertConverter, RobertaConverter, XLMRobertaConverter
try:
    from transformers.models.auto.modeling_auto import auto_class_factory
except:
    from transformers.models.auto.modeling_auto import _BaseAutoModelClass, auto_class_update

from .models.LiLTRobertaLike import (
    LiLTRobertaLikeConfig,
    LiLTRobertaLikeForRelationExtraction,
    LiLTRobertaLikeForTokenClassification,
    LiLTRobertaLikeTokenizer,
    LiLTRobertaLikeTokenizerFast,
)

CONFIG_MAPPING.update([("liltrobertalike", LiLTRobertaLikeConfig),])
MODEL_NAMES_MAPPING.update([("liltrobertalike", "LiLTRobertaLike"),])
TOKENIZER_MAPPING.update(
    [
        (LiLTRobertaLikeConfig, (LiLTRobertaLikeTokenizer, LiLTRobertaLikeTokenizerFast)),
    ]
)

with open('tag.txt', 'r') as tagf:
    TAG = tagf.read().lower()
assert TAG == 'monolingual' or TAG == 'multilingual', 'TAG is wrong. It should be monolingual or multilingual.'
if TAG == 'monolingual':
    SLOW_TO_FAST_CONVERTERS.update({"LiLTRobertaLikeTokenizer": RobertaConverter,})
elif TAG == 'multilingual':
    SLOW_TO_FAST_CONVERTERS.update({"LiLTRobertaLikeTokenizer": XLMRobertaConverter,})

MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.update(
    [(LiLTRobertaLikeConfig, LiLTRobertaLikeForTokenClassification),]
)

MODEL_FOR_RELATION_EXTRACTION_MAPPING = OrderedDict(
    [(LiLTRobertaLikeConfig, LiLTRobertaLikeForRelationExtraction),]
)

try:
    AutoModelForTokenClassification = auto_class_factory(
        "AutoModelForTokenClassification", MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, head_doc="token classification")
except:
    cls = types.new_class("AutoModelForTokenClassification", (_BaseAutoModelClass,))
    cls._model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
    cls.__name__ = "AutoModelForTokenClassification"

    AutoModelForTokenClassification = auto_class_update(cls, head_doc="token classification")


try:
    AutoModelForRelationExtraction = auto_class_factory(
        "AutoModelForRelationExtraction", MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, head_doc="relation extraction")
except:
    cls = types.new_class("AutoModelForRelationExtraction", (_BaseAutoModelClass,))
    cls._model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
    cls.__name__ = "AutoModelForRelationExtraction"

    AutoModelForRelationExtraction = auto_class_update(cls, head_doc="relation extraction")

# AutoModelForTokenClassification = auto_class_factory(
#     "AutoModelForTokenClassification", MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, head_doc="token classification"
# )

# AutoModelForRelationExtraction = auto_class_factory(
#     "AutoModelForRelationExtraction", MODEL_FOR_RELATION_EXTRACTION_MAPPING, head_doc="relation extraction"
# )

第19行和20行，这里是为了更新transformer内的定义，因为transformer==4.5.1时，还没有lilt模型。

CONFIG_MAPPING.update([("liltrobertalike", LiLTRobertaLikeConfig),("lilt", LiLTRobertaLikeConfig),])
MODEL_NAMES_MAPPING.update([("liltrobertalike", "LiLTRobertaLike"),("lilt", "LiLTRobertaLike"),])

另外，楼主原本的问题，是packaging包的版本造成的，降级到packaging==21.0即可

from lilt.

ZeningLin commented on August 12, 2024

看样子是某个库版本对不上

检查一下当前的环境配置，按照README里面的要求重新安装一下conda环境

from lilt.

whalefa1I commented on August 12, 2024

就是按照readme里面的步骤安装的，出现了一样的问题

from lilt.

whalefa1I commented on August 12, 2024

你好，我是东华大学的一名研究生。很荣幸能够阅读你这么优秀的文章，并进行复现。现在我在运行你的实例代码时，碰到了一些问题：希望你能抽空看看，解决一下。十分感谢！

transformer库的问题，改一下/LiLT/LiLTfinetune里面的init文件就行


from collections import OrderedDict
import types
from transformers import CONFIG_MAPPING, MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, MODEL_NAMES_MAPPING, TOKENIZER_MAPPING
from transformers.convert_slow_tokenizer import SLOW_TO_FAST_CONVERTERS, BertConverter, RobertaConverter, XLMRobertaConverter
try:
    from transformers.models.auto.modeling_auto import auto_class_factory
except:
    from transformers.models.auto.modeling_auto import _BaseAutoModelClass, auto_class_update

from .models.LiLTRobertaLike import (
    LiLTRobertaLikeConfig,
    LiLTRobertaLikeForRelationExtraction,
    LiLTRobertaLikeForTokenClassification,
    LiLTRobertaLikeTokenizer,
    LiLTRobertaLikeTokenizerFast,
)

CONFIG_MAPPING.update([("liltrobertalike", LiLTRobertaLikeConfig),])
MODEL_NAMES_MAPPING.update([("liltrobertalike", "LiLTRobertaLike"),])
TOKENIZER_MAPPING.update(
    [
        (LiLTRobertaLikeConfig, (LiLTRobertaLikeTokenizer, LiLTRobertaLikeTokenizerFast)),
    ]
)

with open('tag.txt', 'r') as tagf:
    TAG = tagf.read().lower()
assert TAG == 'monolingual' or TAG == 'multilingual', 'TAG is wrong. It should be monolingual or multilingual.'
if TAG == 'monolingual':
    SLOW_TO_FAST_CONVERTERS.update({"LiLTRobertaLikeTokenizer": RobertaConverter,})
elif TAG == 'multilingual':
    SLOW_TO_FAST_CONVERTERS.update({"LiLTRobertaLikeTokenizer": XLMRobertaConverter,})

MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.update(
    [(LiLTRobertaLikeConfig, LiLTRobertaLikeForTokenClassification),]
)

MODEL_FOR_RELATION_EXTRACTION_MAPPING = OrderedDict(
    [(LiLTRobertaLikeConfig, LiLTRobertaLikeForRelationExtraction),]
)

try:
    AutoModelForTokenClassification = auto_class_factory(
        "AutoModelForTokenClassification", MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, head_doc="token classification")
except:
    cls = types.new_class("AutoModelForTokenClassification", (_BaseAutoModelClass,))
    cls._model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
    cls.__name__ = "AutoModelForTokenClassification"

    AutoModelForTokenClassification = auto_class_update(cls, head_doc="token classification")


try:
    AutoModelForRelationExtraction = auto_class_factory(
        "AutoModelForRelationExtraction", MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, head_doc="relation extraction")
except:
    cls = types.new_class("AutoModelForRelationExtraction", (_BaseAutoModelClass,))
    cls._model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
    cls.__name__ = "AutoModelForRelationExtraction"

    AutoModelForRelationExtraction = auto_class_update(cls, head_doc="relation extraction")

# AutoModelForTokenClassification = auto_class_factory(
#     "AutoModelForTokenClassification", MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, head_doc="token classification"
# )

# AutoModelForRelationExtraction = auto_class_factory(
#     "AutoModelForRelationExtraction", MODEL_FOR_RELATION_EXTRACTION_MAPPING, head_doc="relation extraction"
# )

from lilt.

Magic-Gj commented on August 12, 2024

在修改过__Init__.py文件，并将transformers库版本更新到4.25.1后。
我重新运行Semantic Entity Recognition on FUNSD的shell脚本文件
CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --nproc_per_node=4 examples/run_funsd.py \ --model_name_or_path lilt-roberta-en-base \ --tokenizer_name roberta-base \ --output_dir ser_funsd_lilt-roberta-en-base \ --do_train \ --do_predict \ --max_steps 2000 \ --per_device_train_batch_size 8 \ --warmup_ratio 0.1 \ --fp16
出现以下报错，貌似是程序中的关键字中断了程序运行。
`*****************************************
Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.

WARNING:main:Process rank: 0, device: cuda:0, n_gpu: 1distributed training: True, 16-bits training: True
INFO:main:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=False,
do_predict=True,
do_train=True,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=None,
evaluation_strategy=no,
fp16=True,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
gradient_accumulation_steps=1,
gradient_checkpointing=False,
greater_is_better=None,
group_by_length=False,
half_precision_backend=auto,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=every_save,
hub_token=<HUB_TOKEN>,
ignore_data_skip=False,
include_inputs_for_metrics=False,
jit_mode_eval=False,
label_names=None,
label_smoothing_factor=0.0,
learning_rate=5e-05,
length_column_name=length,
load_best_model_at_end=False,
local_rank=0,
log_level=passive,
log_level_replica=passive,
log_on_each_node=True,
logging_dir=ser_funsd_lilt-roberta-en-base/runs/Feb20_16-07-29_dgx,
logging_first_step=False,
logging_nan_inf_filter=True,
logging_steps=500,
logging_strategy=steps,
lr_scheduler_type=linear,
max_grad_norm=1.0,
max_steps=2000,
metric_for_best_model=None,
mp_parameters=,
no_cuda=False,
num_train_epochs=3.0,
optim=adamw_hf,
optim_args=None,
output_dir=ser_funsd_lilt-roberta-en-base,
overwrite_output_dir=False,
past_index=-1,
per_device_eval_batch_size=8,
per_device_train_batch_size=8,
prediction_loss_only=False,
push_to_hub=False,
push_to_hub_model_id=None,
push_to_hub_organization=None,
push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
ray_scope=last,
remove_unused_columns=True,
report_to=['tensorboard'],
resume_from_checkpoint=None,
run_name=ser_funsd_lilt-roberta-en-base,
save_on_each_node=False,
save_steps=500,
save_strategy=steps,
save_total_limit=None,
seed=42,
sharded_ddp=[],
skip_memory_metrics=True,
tf32=None,
torchdynamo=None,
tpu_metrics_debug=False,
tpu_num_cores=None,
use_ipex=False,
use_legacy_prediction_loop=False,
use_mps_device=False,
warmup_ratio=0.1,
warmup_steps=0,
weight_decay=0.0,
xpu_backend=None,
)
Downloading and preparing dataset funsd/funsd (download: Unknown size, generated: Unknown size, post-processed: Unknown size, total: Unknown size) to /home/pm-gaojun/.cache/huggingface/datasets/funsd/funsd/1.0.0/f64b3b868c231da8090a3d527c1376c082b0afb5873ddb1e65e9f1005c919106...
WARNING:main:Process rank: 2, device: cuda:2, n_gpu: 1distributed training: True, 16-bits training: True
WARNING:main:Process rank: 1, device: cuda:1, n_gpu: 1distributed training: True, 16-bits training: True
WARNING:main:Process rank: 3, device: cuda:3, n_gpu: 1distributed training: True, 16-bits training: True
Downloading: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16.8M/16.8M [09:12<00:00, 30.5kB/s]
Dataset funsd downloaded and prepared to /home/pm-gaojun/.cache/huggingface/datasets/funsd/funsd/1.0.0/f64b3b868c231da8090a3d527c1376c082b0afb5873ddb1e65e9f1005c919106. Subsequent calls will reuse this data.
WARNING:datasets.builder:Reusing dataset funsd (/home/pm-gaojun/.cache/huggingface/datasets/funsd/funsd/1.0.0/f64b3b868c231da8090a3d527c1376c082b0afb5873ddb1e65e9f1005c919106)
WARNING:datasets.builder:Reusing dataset funsd (/home/pm-gaojun/.cache/huggingface/datasets/funsd/funsd/1.0.0/f64b3b868c231da8090a3d527c1376c082b0afb5873ddb1e65e9f1005c919106)
[INFO|configuration_utils.py:652] 2023-02-20 16:16:54,487 >> loading configuration file lilt-roberta-en-base/config.json
Traceback (most recent call last):
File "examples/run_funsd.py", line 369, in
main()
File "examples/run_funsd.py", line 135, in main
use_auth_token=True if model_args.use_auth_token else None,
File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/transformers/models/auto/configuration_auto.py", line 829, in from_pretrained
config_class = CONFIG_MAPPING[config_dict["model_type"]]
File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/transformers/models/auto/configuration_auto.py", line 536, in getitem
raise KeyError(key)
KeyError: 'liltrobertalike'
Traceback (most recent call last):
Traceback (most recent call last):
File "examples/run_funsd.py", line 369, in
File "examples/run_funsd.py", line 369, in
main()main()

File "examples/run_funsd.py", line 135, in main
File "examples/run_funsd.py", line 135, in main
use_auth_token=True if model_args.use_auth_token else None,
use_auth_token=True if model_args.use_auth_token else None, File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/transformers/models/auto/configuration_auto.py", line 829, in from_pretrained

File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/transformers/models/auto/configuration_auto.py", line 829, in from_pretrained
config_class = CONFIG_MAPPING[config_dict["model_type"]]
config_class = CONFIG_MAPPING[config_dict["model_type"]]
File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/transformers/models/auto/configuration_auto.py", line 536, in getitem
File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/transformers/models/auto/configuration_auto.py", line 536, in getitem
raise KeyError(key)raise KeyError(key)

KeyError: KeyError'liltrobertalike'
: 'liltrobertalike'
WARNING:datasets.builder:Reusing dataset funsd (/home/pm-gaojun/.cache/huggingface/datasets/funsd/funsd/1.0.0/f64b3b868c231da8090a3d527c1376c082b0afb5873ddb1e65e9f1005c919106)
Traceback (most recent call last):
File "examples/run_funsd.py", line 369, in
main()
File "examples/run_funsd.py", line 135, in main
use_auth_token=True if model_args.use_auth_token else None,
File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/transformers/models/auto/configuration_auto.py", line 829, in from_pretrained
config_class = CONFIG_MAPPING[config_dict["model_type"]]
File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/transformers/models/auto/configuration_auto.py", line 536, in getitem
raise KeyError(key)
KeyError: 'liltrobertalike'
Killing subprocess 132879
Killing subprocess 132880
Killing subprocess 132881
Killing subprocess 132882
Traceback (most recent call last):
File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"main", mod_spec)
File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/torch/distributed/launch.py", line 340, in
main()
File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/torch/distributed/launch.py", line 326, in main
sigkill_handler(signal.SIGTERM, None) # not coming back
File "/home/pm-gaojun/anaconda3/envs/lilt/lib/python3.7/site-packages/torch/distributed/launch.py", line 301, in sigkill_handler
raise subprocess.CalledProcessError(returncode=last_return_code, cmd=cmd)
subprocess.CalledProcessError: Command '['/home/pm-gaojun/anaconda3/envs/lilt/bin/python', '-u', 'examples/run_funsd.py', '--local_rank=3', '--model_name_or_path', 'lilt-roberta-en-base', '--tokenizer_name', 'roberta-base', '--output_dir', 'ser_funsd_lilt-roberta-en-base', '--do_train', '--do_predict', '--max_steps', '2000', '--per_device_train_batch_size', '8', '--warmup_ratio', '0.1', '--fp16']' returned non-zero exit status 1.
`

from lilt.

lvbohui commented on August 12, 2024

请问后来解决关键字的问题了吗，我最近在尝试用这个模型，但是也有同样的问题

from lilt.

speakstone commented on August 12, 2024

pip install packaging==21.3 ok

from lilt.

代码运行问题 about lilt HOT 8 CLOSED

Comments (8)

Related Issues (20)

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent