#load needed packages
library(xml2)
library(rvest)
library(lexRankr)
#url to scrape
monsanto_url = "https://www.theguardian.com/environment/2017/sep/28/monsanto-banned-from-european-parliament"
#read page html
page = xml2::read_html(monsanto_url)
#extract text from page html using selector
page_text = rvest::html_text(rvest::html_nodes(page, ".js-article__body p"))
#perform lexrank for top 3 sentences
top_3 = lexRankr::lexRank(page_text,
#only 1 article; repeat same docid for all of input vector
docId = rep(1, length(page_text)),
#return 3 sentences to mimick /u/autotldr's output
n = 3,
continuous = TRUE)
#reorder the top 3 sentences to be in order of appearance in article
order_of_appearance = order(as.integer(gsub("_","",top_3$sentenceId)))
#extract sentences in order of appearance
ordered_top_3 = top_3[order_of_appearance, "sentence"]
> ordered_top_3
[1] "Monsanto lobbyists have been banned from entering the European parliament after the multinational refused to attend a parliamentary hearing into allegations of regulatory interference."
[2] "Monsanto officials will now be unable to meet MEPs, attend committee meetings or use digital resources on parliament premises in Brussels or Strasbourg."
[3] "A Monsanto letter to MEPs seen by the Guardian said that the European parliament was not “an appropriate forum” for discussion on the issues involved."
library(reticulate)
conda_Env <- conda_list()
if(any(conda_Env[, 1] == "summary") == FALSE)
{
reticulate::conda_create(envname = "summary", packages = c("transformers", "SentencePiece"), python_version = "3.9.16")
reticulate::conda_install(envname = "summary", packages = "torch", pip = TRUE)
}
reticulate::use_condaenv(condaenv = "summary")
transformers <- import(module = "transformers")
tokenizer <- transformers$AutoTokenizer$from_pretrained("google/pegasus-xsum")
model <- transformers$PegasusForConditionalGeneration$from_pretrained("google/pegasus-xsum")
summarize <- function(text)
{
inputs <- tokenizer(text, return_tensors = "pt")
output_sequences <- model$generate(input_ids = inputs$input_ids)
summarized_text <- tokenizer$batch_decode(output_sequences)
return(summarized_text)
}
text <- "Monsanto lobbyists have been banned from entering the European parliament after the multinational refused to attend a\n
parliamentary hearing into allegations of regulatory interference.\n
It is the first time MEPs have used new rules to withdraw parliamentary access for firms that ignore\n
a summons to attend parliamentary inquiries or hearings.\n
Monsanto officials will now be unable to meet MEPs, attend committee meetings or use digital resources on parliament premises in Brussels or Strasbourg.\n
While a formal process still needs to be worked through, a spokesman for the parliament’s president Antonio Tajani said that\n
the leaders of all major parliamentary blocks had backed the ban in a vote this morning."
summarize(text)
[1] "<pad> MEPs have taken the first step in blocking access to the European Parliament for lobbying firms.</s>"
library(chatgpt)
question <- "Can you summarize the following text in one sentence : \n Monsanto lobbyists have been banned from entering the European parliament after the multinational refused to attend a\n
parliamentary hearing into allegations of regulatory interference.\n
It is the first time MEPs have used new rules to withdraw parliamentary access for firms that ignore\n
a summons to attend parliamentary inquiries or hearings.\n
Monsanto officials will now be unable to meet MEPs, attend committee meetings or use digital resources on parliament premises in Brussels or Strasbourg.\n
While a formal process still needs to be worked through, a spokesman for the parliament’s president Antonio Tajani said that\n
the leaders of all major parliamentary blocks had backed the ban in a vote this morning."
Sys.setenv(OPENAI_API_KEY = "xxx")
chatgpt::reset_chat_session()
ask_chatgpt(question)
[1] "Monsanto lobbyists have been banned from the European parliament for refusing to attend a hearing on allegations of regulatory interference, marking the first time new rules have been used to withdraw parliamentary access from firms that ignore summons to attend inquiries or hearings."
4条答案
按热度按时间zsbz8rwp1#
你可以尝试这个(从LSAfun包):
字符串
其中'D'指定您的文本文档,'k'指定要在摘要中使用的句子数量。(进一步的修改在软件包文档中显示)。
更多信息:http://search.r-project.org/library/LSAfun/html/genericSummary.html
ezykj2lf2#
有一个名为lexRankr的软件包,它可以用Reddit的/u/autotldr机器人总结文章的方式来总结文本。This article有一个完整的关于如何使用它的演练,但只是作为一个快速的例子,所以你可以在R中自己测试它:
字符串
nmpmafwu3#
以下是基于Pegasus Transformer模型的方法:
字符串
gpfsuwkq4#
下面是另一种可以考虑的基于chatGPT的方法:
字符串