将文字内容转换成pyg格式的代码
''' python 字符串转图算法 王金龙 写于2023/9/4 ''' import spacy from spacy import displacy from transformers import BertTokenizer, TFBertModel import os os.environ["http_proxy"] = "http://127.0.0.1:7890" os.environ["https_proxy"] = "http://127.0.0.1:7890" tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') text ="Beautiful girl, I love you." nlp = spacy.load('en_core_web_md') doc = nlp(text) bert_tocker = [] index_list = [] edge = [[],[]] jsq = 1 for char in doc: index_list.append(jsq) bmh = tokenizer.encode(char.text,add_special_tokens=False)#用bert给文字编码 jsq = jsq + len(bmh)#加上编码后的长度 bert_tocker =bert_tocker +bmh#编码后合并 for char in doc: for eidx in char.subtree: if eidx == char: continue else: edge[0].append(index_list[char.i]) edge[1].append(index_list[eidx.i]) print(edge) print(bert_tocker) displacy.serve(doc, style="dep")
请安装 spacy库使用,并且下载相关源码
安装
1. spacy 安装:
conda install spacy
2. en库安装:
python -m spacy download en
3. en_core_web_md库安装:
python -m spacy download en_core_web_md
运行效果
[[2, 5, 5, 5, 5, 5, 5], [1, 1, 2, 3, 4, 6, 7]]
对应的图
暂无评论
发表评论