/****************************************************** * * Name: create-grimm-fairy-tales-schema.sql * * Design Phase: * Author: John Miner * Date: 06-12-2012 * Purpose: A program to create the brothers * grimm fairy tale database. This * sample database will be used to * demonstrate full text indexing. * ******************************************************/ /* Create a database to hold the fairy tales */ /* -- Delete existing database IF EXISTS (SELECT name FROM sys.databases WHERE name = N'GRIMM') DROP DATABASE GRIMM GO */ -- Create new database CREATE DATABASE GRIMM ( MAXSIZE = 1GB, EDITION = 'STANDARD', SERVICE_OBJECTIVE = 'S1' ) GO /* Create a schema to hold objects */ -- Delete existing schema. DROP SCHEMA IF EXISTS [STORIES] GO -- Add new schema. CREATE SCHEMA [STORIES] AUTHORIZATION [dbo] GO /* Create a table to hold the fairy tales */ -- Delete existing table DROP TABLE IF EXISTS [STORIES].[TBL_FAIRY_TALES] GO -- Add new table CREATE TABLE [STORIES].[TBL_FAIRY_TALES] ( [MY_ID] [SMALLINT] IDENTITY(1, 1) NOT NULL, [MY_TITLE] [VARCHAR] (75) NOT NULL, [MY_TALE] [VARCHAR](MAX) NULL ) GO /* Insert just the titles (derived table) */ -- Add 62 titles INSERT INTO [STORIES].[TBL_FAIRY_TALES] (MY_TITLE) SELECT T.MY_TITLE FROM ( VALUES ('THE GOLDEN BIRD'), ('HANS IN LUCK'), ('JORINDA AND JORINDEL'), ('THE TRAVELLING MUSICIANS'), ('OLD SULTAN'), ('THE STRAW, THE COAL, AND THE BEAN'), ('BRIAR ROSE'), ('THE DOG AND THE SPARROW'), ('THE TWELVE DANCING PRINCESSES'), ('THE FISHERMAN AND HIS WIFE'), ('THE WILLOW-WREN AND THE BEAR'), ('THE FROG-PRINCE'), ('CAT AND MOUSE IN PARTNERSHIP'), ('THE GOOSE-GIRL'), ('THE ADVENTURES OF CHANTICLEER AND PARTLET'), ('RAPUNZEL'), ('FUNDEVOGEL'), ('THE VALIANT LITTLE TAILOR'), ('HANSEL AND GRETEL'), ('THE MOUSE, THE BIRD, AND THE SAUSAGE'), ('MOTHER HOLLE'), ('LITTLE RED-CAP [LITTLE RED RIDING HOOD]'), ('THE ROBBER BRIDEGROOM'), ('TOM THUMB'), ('RUMPELSTILTSKIN'), ('CLEVER GRETEL'), ('THE OLD MAN AND HIS GRANDSON'), ('THE LITTLE PEASANT'), ('FREDERICK AND CATHERINE'), ('SWEETHEART ROLAND'), ('SNOWDROP'), ('THE PINK'), ('CLEVER ELSIE'), ('THE MISER IN THE BUSH'), ('ASHPUTTEL'), ('THE WHITE SNAKE'), ('THE WOLF AND THE SEVEN LITTLE KIDS'), ('THE QUEEN BEE'), ('THE ELVES AND THE SHOEMAKER'), ('THE JUNIPER-TREE'), ('THE TURNIP'), ('CLEVER HANS'), ('THE THREE LANGUAGES'), ('THE FOX AND THE CAT'), ('THE FOUR CLEVER BROTHERS'), ('LILY AND THE LION'), ('THE FOX AND THE HORSE'), ('THE BLUE LIGHT'), ('THE RAVEN'), ('THE GOLDEN GOOSE'), ('THE WATER OF LIFE'), ('THE TWELVE HUNTSMEN'), ('THE KING OF THE GOLDEN MOUNTAIN'), ('DOCTOR KNOWALL'), ('THE SEVEN RAVENS'), ('THE WEDDING OF MRS FOX'), ('THE SALAD'), ('THE STORY OF THE YOUTH WHO WENT FORTH TO LEARN WHAT FEAR WAS'), ('KING GRISLY-BEARD'), ('IRON HANS'), ('CAT-SKIN'), ('SNOW-WHITE AND ROSE-RED') ) AS T (MY_TITLE); GO -- Show table w/o tales SELECT * FROM [GRIMM].[STORIES].[TBL_FAIRY_TALES]; GO /* Create master key */ -- Drop master key IF EXISTS (SELECT * FROM sys.symmetric_keys WHERE name = '##MS_DatabaseMasterKey##') DROP MASTER KEY; -- Create master key CREATE MASTER KEY ENCRYPTION BY PASSWORD = 'Qkmof0SV3yxReKEP'; GO /* Create database credential */ -- Drop db credential IF EXISTS(SELECT * FROM sys.database_credentials WHERE name = 'CRD_AZURE_4_TALES') DROP DATABASE SCOPED CREDENTIAL CRD_AZURE_4_TALES; GO -- Create db credential CREATE DATABASE SCOPED CREDENTIAL CRD_AZURE_4_TALES WITH IDENTITY = 'SHARED ACCESS SIGNATURE', SECRET = 'sv=2017-04-17&ss=b&srt=sco&sp=rwdlac&se=2018-08-01T21:32:58Z&st=2017-08-01T13:32:58Z&spr=https,http&sig=g6PVlNZuDjzyPAP6xHs9h%2Bv9LPRfrTy0cuSMGwpsQtA%3D'; GO /* Create external data src */ -- Drop external data src IF EXISTS (SELECT * FROM sys.external_data_sources WHERE NAME = 'EDS_AZURE_4_TALES') DROP EXTERNAL DATA SOURCE EDS_AZURE_4_TALES; GO -- Create external data src CREATE EXTERNAL DATA SOURCE EDS_AZURE_4_TALES WITH ( TYPE = BLOB_STORAGE, LOCATION = 'https://sa4tips17.blob.core.windows.net/sc4tips17', CREDENTIAL = CRD_AZURE_4_TALES ); GO /* Show new database objects */ -- Display master key, database credential & external data source SELECT * FROM sys.symmetric_keys WHERE name = '##MS_DatabaseMasterKey##' SELECT * FROM sys.database_credentials WHERE name = 'CRD_AZURE_4_TALES' SELECT * FROM sys.external_data_sources WHERE NAME = 'EDS_AZURE_4_TALES' GO /* Read in one file as an example */ SELECT * FROM OPENROWSET ( BULK 'TALES/GRIMM-01.TXT', DATA_SOURCE = 'EDS_AZURE_4_TALES', SINGLE_CLOB ) AS RAW_DATA; GO /* Update tales with text file (blob) */ -- delete work space DROP TABLE IF EXISTS #TMP_TALE GO -- do not count rows SET NOCOUNT ON; -- local variables DECLARE @VAR_CNT SMALLINT; DECLARE @VAR_MAX SMALLINT; DECLARE @VAR_STMT VARCHAR(2048); -- setup counters SELECT @VAR_CNT = 1; SELECT @VAR_MAX = 62; -- create work space CREATE TABLE #TMP_TALE (MY_TALE VARCHAR(MAX)); -- load the tales WHILE (@VAR_CNT <= @VAR_MAX) BEGIN -- clear work space DELETE FROM #TMP_TALE; -- make dynamic sql stmt SELECT @VAR_STMT = ' INSERT INTO #TMP_TALE SELECT BulkColumn FROM OPENROWSET ( BULK ''TALES/GRIMM-' + REPLACE(STR(@VAR_CNT, 2, 0), ' ', '0') + '.TXT'', DATA_SOURCE = ''EDS_AZURE_4_TALES'', SINGLE_CLOB ) AS RAW_DATA;' -- load file into work space PRINT @VAR_STMT; EXEC (@VAR_STMT); -- update fairy tale table UPDATE [GRIMM].[STORIES].[TBL_FAIRY_TALES] SET MY_TALE = (SELECT MY_TALE FROM #TMP_TALE) WHERE MY_ID = @VAR_CNT; -- increment counter SELECT @VAR_CNT = @VAR_CNT + 1; END; GO -- count rows SET NOCOUNT OFF; -- delete work space DROP TABLE IF EXISTS #TMP_TALE GO -- Show table with tales SELECT * FROM [GRIMM].[STORIES].[TBL_FAIRY_TALES]; GO /* Looking for wolf's */ -- # full table or clustered index scan # -- Show time & i/o SET STATISTICS TIME ON SET STATISTICS IO ON GO -- Select everything with word 'wolf' SELECT * FROM [STORIES].[TBL_FAIRY_TALES] T WHERE T.MY_TALE LIKE '%wolf%' GO -- Select everything with word 'wolf' SELECT * FROM [STORIES].[TBL_FAIRY_TALES] T WHERE PATINDEX('%wolf%', T.MY_TALE) > 0 GO -- Hide time & i/o SET STATISTICS TIME OFF SET STATISTICS IO OFF GO /* Add primary key and re-test previous queries */ -- Delete existing primary key IF EXISTS (SELECT * FROM sys.indexes WHERE object_id = OBJECT_ID(N'[STORIES].[TBL_FAIRY_TALES]') AND name = N'PK_FAIRY_TALES_ID') ALTER TABLE [STORIES].[TBL_FAIRY_TALES] DROP CONSTRAINT [PK_FAIRY_TALES_ID] GO -- Add index on id (primary key - clustered) ALTER TABLE [STORIES].[TBL_FAIRY_TALES] ADD CONSTRAINT PK_FAIRY_TALES_ID PRIMARY KEY(MY_ID) GO /* Indexes have a limit of 900 bytes or less (fails) */ -- Try to add index to text data type? CREATE NONCLUSTERED INDEX IDX_FAIRY_TALES_TXT ON [STORIES].[TBL_FAIRY_TALES] (MY_TALE) GO -- Remove primary key before running /* Create a full text index */ -- delete existing catalog IF EXISTS (SELECT * FROM sys.fulltext_catalogs WHERE name = N'FT_GRIMM_CATALOG') DROP FULLTEXT CATALOG FT_GRIMM_CATALOG GO -- create new catalog CREATE FULLTEXT CATALOG FT_GRIMM_CATALOG AS DEFAULT; GO -- delete existing full text index IF EXISTS (SELECT * FROM sys.fulltext_indexes WHERE object_id = object_id('[STORIES].[TBL_FAIRY_TALES]') ) DROP FULLTEXT INDEX ON [STORIES].[TBL_FAIRY_TALES] GO -- Create full text index CREATE FULLTEXT INDEX ON [STORIES].[TBL_FAIRY_TALES] ([MY_TALE]) KEY INDEX PK_FAIRY_TALES_ID ON FT_GRIMM_CATALOG; GO -- Enable the index ALTER FULLTEXT INDEX ON [STORIES].[TBL_FAIRY_TALES] ENABLE; GO -- Start the population ALTER FULLTEXT INDEX ON [STORIES].[TBL_FAIRY_TALES] START FULL POPULATION; GO /* Show new database objects */ -- table indexes SELECT * FROM sys.indexes i JOIN sys.objects o ON i.object_id = o.object_id WHERE is_ms_shipped = 0; -- ft catalog SELECT * FROM sys.fulltext_catalogs; -- full text indexes SELECT * FROM sys.fulltext_indexes; GO -- # clustered index seek / full text match # -- Exact match word 'wolf' - (7) results SELECT MY_ID, MY_TITLE FROM [STORIES].[TBL_FAIRY_TALES] T WHERE CONTAINS(T.MY_TALE, 'wolf') GO -- Exact match word 'wolf' - (7) results SELECT MY_ID, MY_TITLE FROM [STORIES].[TBL_FAIRY_TALES] T WHERE FREETEXT(T.MY_TALE, 'wolf') GO -- Exact match word 'wolf' as a word or prefix & ranking value - (7) results SELECT MY_ID, MY_TITLE, [RANK] FROM [STORIES].[TBL_FAIRY_TALES] AS T JOIN CONTAINSTABLE(STORIES.TBL_FAIRY_TALES, MY_TALE, ' "wolf*" ') FTS ON T.MY_ID = FTS.[KEY] ORDER BY [RANK] DESC GO -- Fuzzy match on a phrase - (7) results SELECT MY_ID, MY_TITLE, [RANK] FROM [STORIES].[TBL_FAIRY_TALES] AS T JOIN FREETEXTTABLE(STORIES.TBL_FAIRY_TALES, MY_TALE, 'wolf') FTS ON T.MY_ID = FTS.[KEY] ORDER BY [RANK] DESC GO -- Fuzzy match on a phrase - (33) results SELECT MY_ID, MY_TITLE, [RANK] FROM [STORIES].[TBL_FAIRY_TALES] AS T JOIN FREETEXTTABLE(STORIES.TBL_FAIRY_TALES, MY_TALE, 'big bad wolf') FTS ON T.MY_ID = FTS.[KEY] ORDER BY [RANK] DESC GO /* Where FTI can go wrong */ -- Looking for words with herd such as [shepherd] - (7) results SELECT * FROM [STORIES].[TBL_FAIRY_TALES] T WHERE T.MY_TALE LIKE '%herd%' GO -- Exact match word 'herd' - (1) results SELECT MY_ID, MY_TITLE FROM [STORIES].[TBL_FAIRY_TALES] T WHERE CONTAINS(T.MY_TALE, '"*herd"') GO -- Exact match word 'herd' - (1) results SELECT MY_ID, MY_TITLE FROM [STORIES].[TBL_FAIRY_TALES] T WHERE FREETEXT(T.MY_TALE, '"*herd"') GO /* Find terms inside the dcoument */ -- Where are the keywords? SELECT distinct keyword, display_term, column_id, document_id, convert(varchar(max), ( select position as [text()], ',' as [text()] from sys.dm_fts_index_keywords_position_by_document (DB_ID('GRIMM'), OBJECT_ID('[STORIES].[TBL_FAIRY_TALES]')) I where I.display_term = O.display_term and I.column_id = O.column_id and I.document_id = O.document_id for xml path('') ) ) as positions FROM sys.dm_fts_index_keywords_position_by_document (DB_ID('GRIMM'), OBJECT_ID('[STORIES].[TBL_FAIRY_TALES]')) O WHERE display_term like 'herd' or display_term like 'shepherd' GO -- Set to manual tracking ALTER FULLTEXT INDEX ON [STORIES].[TBL_FAIRY_TALES] SET CHANGE_TRACKING AUTO; GO -- Exact match word 'shepherd' - (6) results SELECT MY_ID, MY_TITLE, [RANK] FROM [STORIES].[TBL_FAIRY_TALES] AS T JOIN CONTAINSTABLE(STORIES.TBL_FAIRY_TALES, MY_TALE, ' "shepherd" ') FTS ON T.MY_ID = FTS.[KEY] ORDER BY [RANK] DESC GO -- delete the story UPDATE [STORIES].[TBL_FAIRY_TALES] SET my_TALE = NULL WHERE my_id = 28 GO -- show the update SELECT * FROM [STORIES].[TBL_FAIRY_TALES] WHERE my_id = 28; GO /* Reducing noise in the text documents */ -- delete existing stop list IF EXISTS (SELECT * FROM sys.fulltext_stoplists WHERE name = 'FT_STOP_LIST' ) DROP FULLTEXT STOPLIST FT_STOP_LIST; GO -- create new stop list CREATE FULLTEXT STOPLIST FT_STOP_LIST FROM SYSTEM STOPLIST; GO -- show noisy words SELECT * FROM sys.fulltext_stopwords WHERE [language] = 'English' -- add stop word to list ALTER FULLTEXT STOPLIST FT_STOP_LIST ADD 'shepherd' LANGUAGE 'English'; GO -- use new stop list ALTER FULLTEXT INDEX ON [STORIES].[TBL_FAIRY_TALES] SET STOPLIST = FT_STOP_LIST; -- Exact match word 'shepherd' - (0) results SELECT MY_ID, MY_TITLE, [RANK] FROM [STORIES].[TBL_FAIRY_TALES] AS T JOIN CONTAINSTABLE(STORIES.TBL_FAIRY_TALES, MY_TALE, ' "shepherd" ') FTS ON T.MY_ID = FTS.[KEY] ORDER BY [RANK] DESC GO -- use new stop list ALTER FULLTEXT INDEX ON [STORIES].[TBL_FAIRY_TALES] SET STOPLIST = SYSTEM; -- Various filters EXEC sp_help_fulltext_system_components 'filter'