TextCatalog.LatentDirichletAllocation Methode
Definition
Wichtig
Einige Informationen beziehen sich auf Vorabversionen, die vor dem Release ggf. grundlegend überarbeitet werden. Microsoft übernimmt hinsichtlich der hier bereitgestellten Informationen keine Gewährleistungen, seien sie ausdrücklich oder konkludent.
Erstellen Sie eine LatentDirichletAllocationEstimator, die LightLDA verwendet, um Text (dargestellt als Vektor von Floats) in einen Vektor zu transformieren, Single der die Ähnlichkeit des Texts mit jedem identifizierten Thema angibt.
public static Microsoft.ML.Transforms.Text.LatentDirichletAllocationEstimator LatentDirichletAllocation (this Microsoft.ML.TransformsCatalog.TextTransforms catalog, string outputColumnName, string inputColumnName = default, int numberOfTopics = 100, float alphaSum = 100, float beta = 0.01, int samplingStepCount = 4, int maximumNumberOfIterations = 200, int likelihoodInterval = 5, int numberOfThreads = 0, int maximumTokenCountPerDocument = 512, int numberOfSummaryTermsPerTopic = 10, int numberOfBurninIterations = 10, bool resetRandomGenerator = false);
static member LatentDirichletAllocation : Microsoft.ML.TransformsCatalog.TextTransforms * string * string * int * single * single * int * int * int * int * int * int * int * bool -> Microsoft.ML.Transforms.Text.LatentDirichletAllocationEstimator
<Extension()>
Public Function LatentDirichletAllocation (catalog As TransformsCatalog.TextTransforms, outputColumnName As String, Optional inputColumnName As String = Nothing, Optional numberOfTopics As Integer = 100, Optional alphaSum As Single = 100, Optional beta As Single = 0.01, Optional samplingStepCount As Integer = 4, Optional maximumNumberOfIterations As Integer = 200, Optional likelihoodInterval As Integer = 5, Optional numberOfThreads As Integer = 0, Optional maximumTokenCountPerDocument As Integer = 512, Optional numberOfSummaryTermsPerTopic As Integer = 10, Optional numberOfBurninIterations As Integer = 10, Optional resetRandomGenerator As Boolean = false) As LatentDirichletAllocationEstimator
Parameter
- catalog
- TransformsCatalog.TextTransforms
Der Katalog der Transformation.
- outputColumnName
- String
Name der Spalte, die aus der Transformation von inputColumnName
.
Dieser Schätzer gibt einen Vektor von Single.
- inputColumnName
- String
Name der zu transformierenden Spalte. Wenn dieser Wert als null
Quelle festgelegt ist, wird der Wert des Werts outputColumnName
als Quelle verwendet.
Dieser Schätzer wird über einen Vektor von Single.
- numberOfTopics
- Int32
Die Anzahl der Themen.
- alphaSum
- Single
Dirichlet vor Dokumentthemavektoren.
- beta
- Single
Dirichlet prior on vocab-topic vectors.
- samplingStepCount
- Int32
Anzahl der Metropolis Hasting-Schritt.
- maximumNumberOfIterations
- Int32
Anzahl von Iterationen.
- likelihoodInterval
- Int32
Berechnen der Protokollwahrscheinlichkeit über das lokale Dataset in diesem Iterationsintervall.
- numberOfThreads
- Int32
Die Anzahl der Schulungsthreads. Der Standardwert hängt von der Anzahl der logischen Prozessoren ab.
- maximumTokenCountPerDocument
- Int32
Der Schwellenwert für die maximale Anzahl von Token pro Dokument.
- numberOfSummaryTermsPerTopic
- Int32
Die Anzahl der Wörter, die das Thema zusammenfassen sollen.
- numberOfBurninIterations
- Int32
Die Anzahl der Burn-In-Iterationen.
- resetRandomGenerator
- Boolean
Setzen Sie den Zufallszahlengenerator für jedes Dokument zurück.
Gibt zurück
Beispiele
using System;
using System.Collections.Generic;
using Microsoft.ML;
namespace Samples.Dynamic
{
public static class LatentDirichletAllocation
{
public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for
// exception tracking and logging, as well as the source of randomness.
var mlContext = new MLContext();
// Create a small dataset as an IEnumerable.
var samples = new List<TextData>()
{
new TextData(){ Text = "ML.NET's LatentDirichletAllocation API " +
"computes topic models." },
new TextData(){ Text = "ML.NET's LatentDirichletAllocation API " +
"is the best for topic models." },
new TextData(){ Text = "I like to eat broccoli and bananas." },
new TextData(){ Text = "I eat bananas for breakfast." },
new TextData(){ Text = "This car is expensive compared to last " +
"week's price." },
new TextData(){ Text = "This car was $X last week." },
};
// Convert training data to IDataView.
var dataview = mlContext.Data.LoadFromEnumerable(samples);
// A pipeline for featurizing the text/string using
// LatentDirichletAllocation API. o be more accurate in computing the
// LDA features, the pipeline first normalizes text and removes stop
// words before passing tokens (the individual words, lower cased, with
// common words removed) to LatentDirichletAllocation.
var pipeline = mlContext.Transforms.Text.NormalizeText("NormalizedText",
"Text")
.Append(mlContext.Transforms.Text.TokenizeIntoWords("Tokens",
"NormalizedText"))
.Append(mlContext.Transforms.Text.RemoveDefaultStopWords("Tokens"))
.Append(mlContext.Transforms.Conversion.MapValueToKey("Tokens"))
.Append(mlContext.Transforms.Text.ProduceNgrams("Tokens"))
.Append(mlContext.Transforms.Text.LatentDirichletAllocation(
"Features", "Tokens", numberOfTopics: 3));
// Fit to data.
var transformer = pipeline.Fit(dataview);
// Create the prediction engine to get the LDA features extracted from
// the text.
var predictionEngine = mlContext.Model.CreatePredictionEngine<TextData,
TransformedTextData>(transformer);
// Convert the sample text into LDA features and print it.
PrintLdaFeatures(predictionEngine.Predict(samples[0]));
PrintLdaFeatures(predictionEngine.Predict(samples[1]));
// Features obtained post-transformation.
// For LatentDirichletAllocation, we had specified numTopic:3. Hence
// each prediction has been featurized as a vector of floats with length
// 3.
// Topic1 Topic2 Topic3
// 0.6364 0.2727 0.0909
// 0.5455 0.1818 0.2727
}
private static void PrintLdaFeatures(TransformedTextData prediction)
{
for (int i = 0; i < prediction.Features.Length; i++)
Console.Write($"{prediction.Features[i]:F4} ");
Console.WriteLine();
}
private class TextData
{
public string Text { get; set; }
}
private class TransformedTextData : TextData
{
public float[] Features { get; set; }
}
}
}