Elasticsearch Custom Analyzers
Analyzer tokenizes text and applies filters. Default analyzer doesn't work for all use cases.
Custom Analyzer Components
Analyzer = Tokenizer + Filters
PUT /my_index
{
"settings": {
"analysis": {
"tokenizer": {
"my_tokenizer": {
"type": "pattern",
"pattern": "\\W+" // split by non-word chars
}
},
"filter": {
"my_synonyms": {
"type": "synonym",
"synonyms": ["quick,fast", "jumps,hops"]
}
},
"analyzer": {
"my_analyzer": {
"tokenizer": "my_tokenizer",
"filter": ["lowercase", "my_synonyms", "stop"]
}
}
}
}
}
Email Analyzer Example
PUT /users
{
"settings": {
"analysis": {
"analyzer": {
"email_analyzer": {
"type": "custom",
"tokenizer": "uax_url_email",
"filter": ["lowercase"]
}
}
}
},
"mappings": {
"properties": {
"email": {
"type": "text",
"analyzer": "email_analyzer",
"fields": {
"keyword": { "type": "keyword" }
}
}
}
}
}
POST /users/_doc
{
"email": "[email protected]"
}
GET /users/_search
{
"query": {
"match": {
"email": "user@example" // works!
}
}
}
Language Analyzers
PUT /articles
{
"settings": {
"analysis": {
"analyzer": {
"english_stop": {
"type": "standard",
"stopwords": "_english_"
},
"russian_stop": {
"type": "standard",
"stopwords": "_russian_"
}
}
}
}
}







