我感觉自己很笨- MatBailie正确地指出了我原来的解决方案是不正确的。实际上,我一直认为TRANSLATE('abc', 'abc', 'bcd')应该返回ddd,但在测试SQL Server 2017的TRANSLATE后,我发现'bcd'将是正确的答案。通过查看此帖子的历史记录,您可以看到我原来的(不正确的版本)。以下是使用ngrams8k的更新解决方案:
DECLARE
@string varchar(8000) = 'abc',
@fromChar varchar(100) = 'abc', -- note: no mutation
@toChar varchar(100) = 'bcd';
SELECT newString =
(
SELECT CASE WHEN x>z THEN '' WHEN x>0 THEN s ELSE t END+''
FROM dbo.ngrams8k(@string,1) ng
CROSS APPLY (VALUES (charindex(ng.token,@fromChar),len(@toChar),ng.token)) x(x,z,t)
CROSS APPLY (VALUES (ng.position, substring(@toChar,x.x,1))) xx(p,s)
ORDER BY xx.p
FOR XML PATH(''), TYPE
).value('(text())[1]', 'varchar(8000)');
CREATE FUNCTION [dbo].[F_Translate]
(
@String varchar(8000),
@FromChar varchar(200),
@ToChar varchar(200)
)
RETURNS varchar(8000)
AS
BEGIN
DECLARE @result as varchar(8000) = NULL
DECLARE @currentChar as char(1) = NULL
DECLARE @CurrentIndexFounded as int = 0
DECLARE @CurrentIndexString as int = 0
IF(@FromChar IS NULL OR @ToChar IS NULL)
BEGIN
return cast('Parameters @FromChar and @ToChar must contains 1 caracter minimum' as int);
END
ELSE IF(DATALENGTH(@FromChar) <> DATALENGTH(@ToChar) OR DATALENGTH(@FromChar) = 0)
BEGIN
return cast('Parameters @FromChar and @ToChar must contain the same number of characters (at least 1 character)' as int);
END
IF(@String IS NOT NULL)
BEGIN
SET @result = '';
WHILE(@CurrentIndexString < DATALENGTH(@String))
BEGIN
SET @CurrentIndexString = @CurrentIndexString + 1;
SET @currentChar = SUBSTRING(@String, @CurrentIndexString, 1);
SET @CurrentIndexFounded = CHARINDEX(@currentChar COLLATE Latin1_General_CS_AS, @FromChar COLLATE Latin1_General_CS_AS);
IF(@CurrentIndexFounded > 0)
BEGIN
SET @result = CONCAT(@result, SUBSTRING(@ToChar, @CurrentIndexFounded, 1)) ;
END
ELSE
BEGIN
SET @result = CONCAT(@result, @currentChar);
END
END
END
return @result
END
CREATE TABLE ReplaceValues (FindChar NVARCHAR(100) NOT NULL
,ReplWith NVARCHAR(100) NOT NULL
,SortOrder INT NOT NULL);
INSERT INTO ReplaceValues VALUES('a','x',1) --all "a" will be "x"
,('test','yeah!',2) --"test" will be "yeah"
,('hello','ciao',3) --"hello" will be "ciao"
,('xxx','magic',4); --this is magic (see below)
GO
--您不能内联使用古怪的更新,但可以将其 Package 在标量函数中:
CREATE FUNCTION dbo.MultiReplace(@ReplaceTarget VARCHAR(MAX))
RETURNS VARCHAR(MAX)
AS
BEGIN
--Quirky Update: One of the rare situations where this is a good idea
SELECT @ReplaceTarget=REPLACE(@ReplaceTarget,rv.FindChar,rv.ReplWith)
FROM ReplaceValues AS rv
ORDER BY rv.SortOrder;
RETURN @ReplaceTarget;
END
GO
--测试数据表
declare @t table(TestString varchar(100))
insert into @t values('This string is without repls')
,('This is a test, hello, one more test')
,('See the cascade replace with aaa, which is converted to xxx, then to magic');
--...and the magic is going in here:
SELECT t.TestString
,dbo.MultiReplace(t.TestString) AS Translated
FROM @t AS t
GO
--清理
DROP FUNCTION dbo.MultiReplace;
DROP TABLE ReplaceValues;
结果
This string is without repls
This is x yeah!, ciao, one more yeah!
See the cxscxde replxce with magic, which is converted to magic, then to mxgic
我想我也应该把我的想法放进去。这避免了可怕的WHILE循环,而且,也没有使用自引用变量(这可能会变得很糟糕)。 注意,首先使用了Tally表,然后我使用了表值函数(而不是速度较慢的标量函数)来完成这项工作。 注意,我已经设置好了,如果您在右边提供的参数较少,字符将被删除。因此,如果参数@FindChars的值为'AB',而@ReplaceChars的值为'C',那么'A'将被替换为' C','B'将被替换为''。我注意到,对于TRANSLATE,这将产生错误The second and third arguments of the TRANSLATE built-in function must contain an equal number of characters. 但是,函数的问题在于不能使用THROW或RAISERROR,这意味着在函数内部实际产生错误是不可能的。然而,你可以设置一些东西,以便在两个长度不匹配时返回 NULL,但(不幸的是)错误产生不能在函数内部执行。
CREATE VIEW dbo.Tally WITH SCHEMABINDING
AS
WITH C1 AS (SELECT 1 AS I UNION ALL SELECT 1),
C2 AS (SELECT 1 AS I FROM C1 AS L CROSS JOIN C1 AS R),
C3 AS (SELECT 1 AS I FROM C2 AS L CROSS JOIN C2 AS R),
C4 AS (SELECT 1 AS I FROM C3 AS L CROSS JOIN C3 AS R),
C5 AS (SELECT 1 AS I FROM C4 AS L CROSS JOIN C4 AS R),
C6 AS (SELECT 1 AS I FROM C5 AS L CROSS JOIN C5 AS R),
RN AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS N FROM C6)
SELECT TOP (1000000) N
FROM RN
ORDER BY N;
GO
CREATE FUNCTION dbo.OwnTranslate (@String varchar(8000),@FindChars varchar(8000), @ReplaceChars varchar(8000))
RETURNS TABLE
AS RETURN (
WITH ToBeReplaced AS (
SELECT @String AS String,
FC.N,
SUBSTRING(@FindChars, FC.N,1) AS FC,
ISNULL(SUBSTRING(@ReplaceChars, RC.N,1),'') AS RC
FROM (SELECT TOP (LEN(@FindChars)) N FROM Tally) FC
OUTER APPLY (SELECT TOP (LEN(@ReplaceChars)) T.N FROM Tally T WHERE T.N = FC.N AND T.N <= LEN(@ReplaceChars)) RC),
Replacing AS (
SELECT N, REPLACE(String, FC, RC) AS ReplacedString
FROM ToBeReplaced
WHERE N = 1
UNION ALL
SELECT R.N + 1, REPLACE(ReplacedString, TBR.FC, TBR.RC) AS ReplacedString
FROM ToBeReplaced TBR
JOIN Replacing R ON TBR.N = R.N + 1)
SELECT TOP 1 ReplacedString
FROM Replacing
ORDER BY N DESC);
GO
WITH VTE AS (
SELECT *
FROM (VALUES ('This is a string to be Translated.')) V(S))
SELECT VTE.S, OT.ReplacedString
FROM VTE
CROSS APPLY dbo.OwnTranslate (VTE.S, 'Ts ', 'qz_') OT;
GO
--Clean up
DROP FUNCTION dbo.OwnTranslate;
DROP VIEW Tally;
CREATE FUNCTION dbo.MultiReplace(@ReplaceTarget NVARCHAR(MAX), @from_chars NVARCHAR(MAX), @to_chars NVARCHAR(MAX))
RETURNS NVARCHAR(MAX)
AS
BEGIN
--Quirky Update: One of the rare situations where this is a good idea
SELECT @ReplaceTarget=REPLACE(@ReplaceTarget,SUBSTRING(@from_chars, id+1, 1), SUBSTRING(@to_chars, id+1, 1))
FROM numbers
WHERE id < LEN(@from_chars) AND id < LEN(@to_chars)
ORDER BY id;
RETURN @ReplaceTarget;
END
CREATE FUNCTION dbo.Translate(@ReplaceTarget NVARCHAR(MAX), @from_chars NVARCHAR(MAX), @to_chars NVARCHAR(MAX))
RETURNS NVARCHAR(MAX)
AS
BEGIN
DECLARE
@steps INT = LEN('_' + @from_chars + '_') - 2
;
WITH
dictionary(id, string_from, string_interim, string_to) AS
(
SELECT
id, string_from, N'<' + string_from + N'>', string_to
FROM
(
SELECT
id,
ROW_NUMBER() OVER (PARTITION BY string_from ORDER BY id) AS occurence,
string_from,
string_to
FROM
numbers
CROSS APPLY
(
SELECT
CAST(SUBSTRING(@from_chars, numbers.id, 1) AS NVARCHAR(5)) AS string_from,
CAST(SUBSTRING(@to_chars, numbers.id, 1) AS NVARCHAR(5)) AS string_to
)
chars
WHERE
numbers.id > 0
AND numbers.id <= @steps
)
sorted_dictionary
WHERE
occurence = 1
)
,
mapping_sequence(id, string_from, string_to) AS
(
SELECT 1, N'<', N'<<>' WHERE @from_chars LIKE N'%<%'
UNION ALL SELECT 2, N'>', N'<>>' WHERE @from_chars LIKE N'%>%'
UNION ALL SELECT 3, N'<<<>>', N'<<>' WHERE @from_chars LIKE N'%<%' AND @from_chars LIKE N'%>%'
UNION ALL SELECT 3 + id, string_from, string_interim FROM dictionary WHERE string_from NOT IN (N'<', N'>')
UNION ALL SELECT 3 + @steps + id, string_interim, string_to FROM dictionary
)
SELECT
@ReplaceTarget = REPLACE(@ReplaceTarget, string_from, string_to)
FROM
mapping_sequence
ORDER BY
id
;
RETURN @ReplaceTarget;
END
CREATE FUNCTION dbo.Translate(
@ReplaceTarget NVARCHAR(MAX),
@from_chars NVARCHAR(MAX),
@to_chars NVARCHAR(MAX)
)
RETURNS NVARCHAR(MAX)
AS
BEGIN
DECLARE
@steps INT = LEN('_' + @from_chars + '_') - 2
;
WITH
dictionary AS
(
SELECT
id, string_from, string_to
FROM
(
SELECT
ROW_NUMBER() OVER ( ORDER BY string_from ) AS id,
ROW_NUMBER() OVER (PARTITION BY string_from ORDER BY id) AS occurence,
string_from,
string_to
FROM
numbers
CROSS APPLY
(
SELECT
CAST(SUBSTRING(@from_chars, numbers.id, 1) AS NVARCHAR(5)) AS string_from,
CAST(SUBSTRING(@to_chars, numbers.id, 1) AS NVARCHAR(5)) AS string_to
)
chars
WHERE
numbers.id > 0
AND numbers.id <= @steps
)
sorted_dictionary
WHERE
occurence = 1
),
two_stage AS
(
SELECT
map.*
FROM
dictionary dict
CROSS APPLY
(
SELECT COUNT(*) FROM dictionary WHERE dictionary.id > dict.id AND dictionary.string_from = dict.string_to
)
remap(hits)
CROSS APPLY
(
SELECT id, dict.string_from, dict.string_to WHERE remap.hits = 0 AND dict.string_from NOT IN (N'<', N'>')
UNION ALL
SELECT id, dict.string_from, N'<' + dict.string_from + N'>' WHERE remap.hits > 0 AND dict.string_from NOT IN (N'<', N'>')
UNION ALL
SELECT id + @steps, N'<' + dict.string_from + N'>', dict.string_to WHERE remap.hits > 0 AND dict.string_from NOT IN (N'<', N'>')
UNION ALL
SELECT id + @steps * 2, N'<' + dict.string_from + N'>', dict.string_to WHERE dict.string_from IN (N'<', N'>')
)
map
)
,
mapping_sequence(id, string_from, string_to) AS
(
SELECT 1, N'<', N'<<>' WHERE @from_chars LIKE N'%<%'
UNION ALL SELECT 2, N'>', N'<>>' WHERE @from_chars LIKE N'%>%'
UNION ALL SELECT 3, N'<<<>>', N'<<>' WHERE @from_chars LIKE N'%<%' AND @from_chars LIKE N'%>%'
UNION ALL SELECT 3 + id, string_from, string_to FROM two_stage
)
SELECT
@ReplaceTarget = REPLACE(@ReplaceTarget, string_from, string_to)
FROM
mapping_sequence
ORDER BY
id
;
RETURN @ReplaceTarget;
END
CREATE OR ALTER FUNCTION util.__string_ReturnOnlyAlpha (@input NVARCHAR(MAX))
RETURNS TABLE
AS RETURN
(
SELECT --(xml).value , remove entities from
(SELECT [value] FROM (
SELECT TOP(LEN(@input))
n.[i] , SUBSTRING(@input,n.[i],1) [value]
FROM ( SELECT ROW_NUMBER() OVER(ORDER BY (SELECT '.')) [i] FROM
STRING_SPLIT(REPLICATE(CAST('.' AS VARCHAR(MAX)),LEN(@input) - 1),'.')
) n([i])
ORDER BY n.[i]
) ss WHERE ss.[Value] IN (' ') OR ss.[value] LIKE '%[A-Za-z]%'
FOR XML PATH(''),type
).value('.','nvarchar(max)') [TextValue]
)
GO
GO
/* Translate pt 1/3 : */
CREATE OR ALTER FUNCTION util.__string_AsCharacterRows(@input NVARCHAR(MAX))
RETURNS TABLE
AS RETURN
(
SELECT TOP(LEN(@input COLLATE Latin1_General_100_CI_AS_KS_WS_SC))
n.[i] , CAST(SUBSTRING(@input COLLATE Latin1_General_100_CI_AS_KS_WS_SC,n.[i],1) AS NVARCHAR(2)) [value]
FROM ( SELECT ROW_NUMBER() OVER(ORDER BY (SELECT '.')) [i] FROM
STRING_SPLIT(REPLICATE(CAST('.' AS VARCHAR(MAX)),LEN(@input) - 1),'.')
) n([i])
ORDER BY n.[i]
)
GO
GO
/* Translate pt 2/3 : */
CREATE OR ALTER FUNCTION util.__string__TranslateMap(@inputString NVARCHAR(MAX),@characters NVARCHAR(MAX),@translations NVARCHAR(MAX))
RETURNS TABLE
AS
RETURN
(
SELECT TOP(LEN(@inputString COLLATE Latin1_General_100_CI_AS_KS_WS_SC))
s.[i],
ISNULL(ct.t,s.[value]) [value]
FROM
util.__string_AsCharacterRows(@inputString) s
LEFT HASH JOIN (SELECT
c.i,
c.[value] c,
t.[value] t
FROM util.__string_AsCharacterRows(@characters) c
INNER MERGE JOIN util.__string_AsCharacterRows(@translations) t ON t.i = c.i
) ct ON ct.c = s.[value]
ORDER BY
s.[i]
)
GO
GO
/* Translate pt 3/3 : */
CREATE OR ALTER FUNCTION util.__string__Translate(@inputString NVARCHAR(MAX),@characters NVARCHAR(MAX),@translations NVARCHAR(MAX))
RETURNS TABLE
AS
RETURN -- if you have a CLR method for string_agg replacement, use that here.
( SELECT
(SELECT [value] FROM
util.__string__TranslateMap(@inputString,@characters,@translations)
FOR XML PATH(''),type
).value('.','nvarchar(max)') [TextValue]
--but if you have CLR and can add new methods, maybe use a CLR translate function
)
GO
GO
/*example*/
SELECT
input.string, -- in: '😃' & 👍.
translated.TextValue -- out: "😎" & ✌!
FROM
(VALUES(N'''😃'' & 👍. ')
,(REPLICATE(N'''😃'' & 👍. ',9000))) input(string) --can handle characters over 9000!
CROSS APPLY util.__string__Translate(input.string,N'''😃👍.',N'"😎✌!') translated
6条答案
按热度按时间cigdeys31#
编辑日期:
我感觉自己很笨- MatBailie正确地指出了我原来的解决方案是不正确的。实际上,我一直认为
TRANSLATE('abc', 'abc', 'bcd')
应该返回ddd,但在测试SQL Server 2017的TRANSLATE后,我发现'bcd'将是正确的答案。通过查看此帖子的历史记录,您可以看到我原来的(不正确的版本)。以下是使用ngrams8k的更新解决方案:返回值〉bcd
x759pob22#
我建议我翻译功能:
gkn4icbw3#
比
WHILE
循环更好的是--至少在我看来是这样-a用一个函数 Package 的古怪的更新:您可以在表中维护替换值。您可以添加一些分组键(例如,用于语言选择或主题焦点),并将其作为附加参数传递给函数:
--您不能内联使用古怪的更新,但可以将其 Package 在标量函数中:
--测试数据表
--清理
结果
9njqaruj4#
我想我也应该把我的想法放进去。这避免了可怕的
WHILE
循环,而且,也没有使用自引用变量(这可能会变得很糟糕)。注意,首先使用了Tally表,然后我使用了表值函数(而不是速度较慢的标量函数)来完成这项工作。
注意,我已经设置好了,如果您在右边提供的参数较少,字符将被删除。因此,如果参数
@FindChars
的值为'AB'
,而@ReplaceChars
的值为'C'
,那么'A'
将被替换为'C'
,'B'
将被替换为''
。我注意到,对于TRANSLATE
,这将产生错误The second and third arguments of the TRANSLATE built-in function must contain an equal number of characters.
但是,函数的问题在于不能使用
THROW
或RAISERROR
,这意味着在函数内部实际产生错误是不可能的。然而,你可以设置一些东西,以便在两个长度不匹配时返回NULL
,但(不幸的是)错误产生不能在函数内部执行。有什么问题,请尽管问。
yjghlzjz5#
改编自@Shnugo的答案。这更接近你想要的。你只需要确保你有一个
dbo.numbers
表 (它们真的很有用)。http://dbfiddle.uk/?rdbms=sqlserver_2016&fiddle=627828307504174dcf3f61313ba384a8
和一个稍微过顶的方式来满足你的要求,
TRANSLATE('abc', 'abc', 'bcd') => 'bcd')
。http://dbfiddle.uk/?rdbms=sqlserver_2017&fiddle=9dbe7214ac4b5bb00060686cfaa879c2
对上述内容进行可能的小优化(尽可能减少REPLACE调用的数量)...
http://dbfiddle.uk/?rdbms=sqlserver_2017&fiddle=8af6ae050dc8d425521ae911b70a7968
或者......
http://dbfiddle.uk/?rdbms=sqlserver_2017&fiddle=1451aa88780463b1e7cfe15dd0071194
或者......
http://dbfiddle.uk/?rdbms=sqlserver_2017&fiddle=3079d4dd4289e8696072f6ee37be76ae
u2nhd7ah6#
在SQL 2016中,需要删除/替换数值和字符(如单引号和双引号)时,我有一个实用函数,它将translate()作为一个没有CTE递归的iTVF。在开发过程中,我发现了代理对--虽然在我的用例中不太可能,但在以下示例中得到了解释。
这使用了3个iTVF,它们是 Package 器,并重用了前面的一个,但如果需要,您可能可以将其作为子查询写出。内存使用未优化,如果需要,您可能需要等待2017或CLR。