I am encountering an issue with Azure Search AI (formerly known as Azure Cognitive Search). My objective is to index my SharePoint using Azure Cognitive Service so that I can integrate it with Azure OpenAI. In my scenario, I have a site, let's call it "Root," with multiple subsites such as subsite-1, subsite-2, subsite-3, and so on. I want to index all the libraries within the subsites in Azure Search. Each subsite's libraries have additional columns that I also want to include in the index. However, the libraries in the "Root" site do not have these additional columns, and I wish to skip indexing them.
Datasource:
{
"name": "prod-sharepoint-datasource",
"type": "sharepoint",
"credentials": {
"connectionString": "SharePointOnlineEndpoint=https://xxx.sharepoint.com/sites/Root/;ApplicationId=xxx;ApplicationSecret=xxx"
},
"container": {
"name": "useQuery",
"query": "includeLibrariesInSite=https://xxx.sharepoint.com/sites/Root;additionalColumns=MyCustomColumn,MyCustomColumn2,MyCustomColumn3"
}
}
I have attempted to exclude libraries from the "Root" site using the excludeLibrary property, but it did not work as expected. Here's an example of what I tried:
{
"name": "prod-sharepoint-datasource",
"type": "sharepoint",
"credentials": {
"connectionString": "SharePointOnlineEndpoint=https://xxx.sharepoint.com/sites/Root/;ApplicationId=xxx;ApplicationSecret=xxx"
},
"container": {
"name": "useQuery",
"query": "includeLibrariesInSite=https://xxx.sharepoint.com/sites/Root;additionalColumns=MyCustomColumn,MyCustomColumn2,MyCustomColumn3;excludeLibrary=https://xxx.sharepoint.com/sites/Root/default.aspx;excludeLibrary=https://xxx.sharepoint.com/sites/Root/Library1.aspx;excludeLibrary=https://xxx.sharepoint.com/sites/Root/Library2.aspx;excludeLibrary=https://xxx.sharepoint.com/sites/Root/Library3.aspx"
} }
I also provided the JSON for the index and the indexer configurations. If anyone has insights on how to properly exclude libraries from the "Root" site or exclude the entire "Root" site so that only the libraries from its subsites are indexed, I would greatly appreciate the assistance.
Indexes:
{
"name" : "prod-sharepoint-indexes",
"fields": [
{ "name": "column1", "type": "Edm.String", "searchable": true, "filterable": false, "sortable": false, "facetable": false },
{ "name": "column2", "type": "Edm.String", "searchable": true, "filterable": false, "sortable": false, "facetable": false },
{ "name": "column3", "type": "Edm.String", "searchable": true, "filterable": false, "sortable": false, "facetable": false },
{ "name": "content", "type": "Edm.String", "searchable": true, "filterable": false, "sortable": false, "facetable": false },
{ "name": "id", "type": "Edm.String", "key": true, "searchable": false },
{ "name": "metadata_spo_item_name", "type": "Edm.String", "key": false, "searchable": true, "filterable": false, "sortable": false, "facetable": false },
{ "name": "metadata_spo_item_path", "type": "Edm.String", "key": false, "searchable": false, "filterable": false, "sortable": false, "facetable": false },
{ "name": "metadata_spo_item_weburi", "type": "Edm.String", "key": false, "searchable": false, "filterable": false, "sortable": false, "facetable": false },
{ "name": "metadata_spo_item_content_type", "type": "Edm.String", "key": false, "searchable": false, "filterable": true, "sortable": false, "facetable": true },
{ "name": "metadata_spo_item_last_modified", "type": "Edm.DateTimeOffset", "key": false, "searchable": false, "filterable": false, "sortable": true, "facetable": false },
{ "name": "metadata_spo_item_size", "type": "Edm.Int64", "key": false, "searchable": false, "filterable": false, "sortable": false, "facetable": false },
{ "name": "content", "type": "Edm.String", "searchable": true, "filterable": false, "sortable": false, "facetable": false },
]
}
Indexer:
{
"name" : "prod-sharepoint-indexer",
"dataSourceName" : "prod-sharepoint-datasource",
"targetIndexName" : "prod-sharepoint-indexes",
"parameters": {
"batchSize": null,
"maxFailedItems": null,
"maxFailedItemsPerBatch": null,
"base64EncodeKeys": null,
"configuration": {
"indexedFileNameExtensions" : ".pdf, .docx, .msg, .xlsx, .eml, .ppt, .doc",
"excludedFileNameExtensions" : ".png, .jpg",
"dataToExtract": "contentAndMetadata"
}
},
"schedule" : { },
"fieldMappings" : [
{
"sourceFieldName" : "metadata_spo_site_library_item_id",
"targetFieldName" : "id",
"mappingFunction" : {
"name" : "base64Encode"
}
}
]
}