This script file will create an XML dictionary file that can be uploaded with the Add-FastSearchResource cmdlet

This script takes all of the unique values of a SharePoint list column and outputs the values to xml file that can be imported as a dictionary file for a entity extractor.  It provides the ability to use the spelling that as it is in the SharePoint list or create variations of capitalized words to that any variation can be picked up during the crawling of a document.  For instance, if the name "Contoso Consulting" was stored in the SharePoint List.  The following variants would be created:

Note: this script only provides capitalization variants and not for each character.

To Use

Specify the SharePoint site that contains the SharePoint List, list name, column name, whether to add word variants and output file name when calling the createDictionaryFromSPList function.

PowerShell
Edit|Remove
param([string]$siteUrl, [string]$listName, [string]$columnName, [bool]$addWordVariants, [string]$output)

Add-PsSnapin Microsoft.SharePoint.PowerShell -erroraction SilentlyContinue 

function createDictionaryFromSPList ($url, $listName, $fieldName, $addWordVariants)
{
    $output = "<?xml version=""1.0"" encoding=""UTF-8"" ?>`n"
    $output += "<dictionary>`n"
    $output += createListEntries $url $listName $fieldName $addWordVariants
    $output += "</dictionary>`n"
    return $output
}

function createWordVariations($phrase)
{
    $words = $phrase.ToLower().Split(" ")
    $binaryStr = ""
    
    for($i = 0; $i -lt $words.Length; $i++)
    {
        $binaryStr += "1"
    }
    
    $variationCount = [System.Convert]::ToInt32($binaryStr, 2) + 1
    
    $phraseVariations = @()
    
    for($i = 0; $i -lt $variationCount; $i++)
    {
        $x = 0
        [int]$deci = $i
        
        do
        {
            $remainder = $deci % 2
            if($remainder -eq 0) {
                $words[$x] = $words[$x].ToLower()
            } else {
                if($words[$x].Length -gt 1) {
                    $words[$x] = ([string]$words[$x][0]).ToUpper() + $words[$x].SubString(1)
                } elseif($words[$x].Length -eq 0) {
                    $words[$x] = $words[$x].ToUpper()
                } 
            }
            $x += 1
            
            $deci = [System.Math]::Floor($deci / 2)
        }
        while($deci -gt 0)
        
        $phraseVariations += [System.String]::Join(" ", $words)
    }
    
    return $phraseVariations
}



function createListEntries ($url, $listName, $fieldName, $addWordVariants)
{
    # Get list from specified Sharepoint web
    $SPWeb = get-spweb $url

    $uniqueKeyValue = new-object System.Collections.Hashtable
    
    if($listName -is [array]) {
        foreach($name in $listName)
        {
            $reflist = $SPWeb.Lists.TryGetList($name)
            $fName = $fieldName[$name]
            
            foreach ($listItem in $reflist.Items) {
                
                $fieldValue = $listItem[$fName]
                
                if($fieldValue)
                {
                    $uniqueKeyValue[$fieldValue] = $fieldValue
                    
                    if($addWordVariants) {
                        $uniqueKeyValue[$fieldValue.ToLower()] = $fieldValue
                        
                        $fieldValueVariations = createWordVariations($fieldValue)
                        
                        foreach($fieldValueVariation in $fieldValueVariations)
                        {
                            $uniqueKeyValue[$fieldValueVariation] = $fieldValue
                        }
                    }
                }
            }
        }
    } else {
        $reflist = $SPWeb.Lists.TryGetList($listName)
        
        foreach ($listItem in $reflist.Items) {
            $fieldValue = $listItem[$fieldName]
            
            if($fieldValue) {
                $uniqueKeyValue[$fieldValue] = $fieldValue
                
                if($addWordVariants) {
                    $uniqueKeyValue[$fieldValue.ToLower()] = $fieldValue
                    
                    $fieldValueVariations = createWordVariations($fieldValue)
                    
                    foreach($fieldValueVariation in $fieldValueVariations)
                    {
                        $uniqueKeyValue[$fieldValueVariation] = $fieldValue
                    }
                }
            }
        }
    }
     
    $result = ""
    
    foreach ($key in ($uniqueKeyValue.Keys | sort))
    {
        $result += "`t<entry key=""{0}"" value=""{1}""/>`n" -f $(encode-Xml($key)), $(encode-Xml($uniqueKeyValue[$key]))
    }
    
    $SPWeb.Dispose()
    
    return $result
}

function encode-Xml( [string] $text )
{
    $text = $text -replace "&(?![\w#]+;)", "&amp;"
    $text = $text -replace '["??????]', "&quot;"
    $text = $text -replace "(?<!--)>", "&gt;"
    $text = $text -replace "<(?!!--)", "&lt;"

    return $text
}

createDictionaryFromSPList $siteUrl $listName $columnName $addWordVariants | out-file $output -encoding "UTF8"