There are several answers here. Lets test them all :o)
--drop table if exists t;
--drop function if exists strings_equivalent_erwin(text, text);
--drop function if exists strings_equivalent_stefan(text, text);
--drop function if exists strings_equivalent_evan(text, text);
--drop function if exists strings_equivalent_my(text, text);
create table t(x text);
insert into t select concat((random()*100)::int, ' ',(random()*100)::int, ' ',(random()*100)::int) from generate_series(1,500);
CREATE OR REPLACE FUNCTION strings_equivalent_erwin(a text, b text)
RETURNS bool AS
$func$
SELECT a1 = b1
FROM (
SELECT string_agg(w, ' ') AS a1
FROM (
SELECT w
FROM unnest(string_to_array(a, ' ')) w
ORDER BY w
) a1
) a2
, (
SELECT string_agg(w, ' ') AS b1
FROM (
SELECT w
FROM unnest(string_to_array(b, ' ')) w
ORDER BY w
) b1
) b2
WHERE length(a) = length(b)
UNION ALL
SELECT FALSE
LIMIT 1; -- for clarity, not needed
$func$ LANGUAGE sql IMMUTABLE;
create or replace function strings_equivalent_stefan(a text, b text)
returns boolean as $$
select
case
when length(a) >= length(b) then (
select count( exa1 ) from (
select unnest( string_to_array( a, ' ' ) )
except all
select unnest( string_to_array( b, ' ' ) ) ) exa1
) = 0
when length(a) < length(b) then (
select count( exa2 ) from (
select unnest( string_to_array( b, ' ' ) )
except all
select unnest( string_to_array( a, ' ' ) ) ) exa2
) = 0
else false
end
$$ language sql immutable;
CREATE OR REPLACE FUNCTION strings_equivalent_evan(a text, b text)
RETURNS bool AS
$func$
my (%arg1, %arg2);
$arg1{$_}++ for grep /\w/, split /\s+/, $_[0];
$arg2{$_}++ for grep /\w/, split /\s+/, $_[1];
return 0 if length %arg1 != length %arg2;
foreach ( keys %arg1 ) {
return 0 if $arg1{$_} != $arg2{$_};
}
return 1;
$func$
LANGUAGE plperl
STRICT IMMUTABLE;
create or replace function strings_equivalent_my(a text, b text) returns bool immutable language plpythonu as $$
return sorted(a.split(' ')) == sorted(b.split(' '))
$$;
The most slow is the Stefan's variant. On my aged HW it is:
explain analyse select * from t as t1 join t as t2 on (strings_equivalent_stefan(t1.x, t2.x));
Execution time: 37323.874 ms
The accepted Erwin's case is faster a bit (but not much significant):
explain analyse select * from t as t1 join t as t2 on (strings_equivalent_erwin(t1.x, t2.x));
Execution time: 29939.032 ms
The Evan's variant is faster more then 2 times:
explain analyse select * from t as t1 join t as t2 on (strings_equivalent_evan(t1.x, t2.x));
Execution time: 13452.271 ms
But the winner is my lovely Python :o) (and it is also much more readable)
explain analyse select * from t as t1 join t as t2 on (strings_equivalent_my(t1.x, t2.x));
Execution time: 5818.160 ms
Good luck.
PS: To be honest, there is regex variant with removing whitespaces and case insensitive:
create or replace function strings_equivalent_my(a text, b text) returns bool immutable language plpythonu as $$
import re
return sorted(re.split('\s+', a.strip().lower())) == sorted(re.split('\s+', b.strip().lower()))
$$;
and it is much slower: Execution time: 12127.945 ms but it is still faster then Perl.