当前位置: 代码迷 >> 综合 >> elasticsearch7.x基操
  详细解决方案

elasticsearch7.x基操

热度:72   发布时间:2023-12-14 23:01:06.0

点我下载elastic和kibana

                                           Elasticsearch 7.x

1.Elasticsearch的概述

Elasticsearch(ES)是一个基于Lucene构建的开源、分布式、RESTful接口的全文搜索引擎。Elasticsearch还是一个分布式文档数据库,其中每个字段均可被索引,而且每个字段的数据均可被搜索,ES能够横向扩展至数以百计的服务器存储以及处理PB级的数据。可以在极短的时间内存储、搜索和分析大量的数据。通常作为具有复杂搜索场景情况下的核心发动机。

2.Elasticsearch安装

  1. 将解压的elasticsearch-7.15.0.tar.gz和kibana-7.15.0-linux-x86_64.tar.gz上传到linux的/usr/local下
  2. cd usr/localtar -zxvf elasticsearch-7.15.0.tar.gzcd elasticsearch-7.15.0/configvim elasticsearch.yml//修改两条
    network.host: 192.168.212.151
    http.port: 9200
    //新增一条
    network.bind_host: 0.0.0.0执行命令wq保存退出
    vim /etc/sysctl.confvm.max_map_count=655360执行命令wq保存退出sysctl -pvi /etc/security/limits.conf* soft nofile 65536
    * hard nofile 131072
    * soft nproc 2048
    * hard nproc 4096执行命令wq保存退出

    3.新建用户

groupadd esmayiktuseradd esyushengjun -g esmayikt -p 123456chown -R esyushengjun:esmayikt  /usr/local/elasticsearch-7.15.0

       4.重启服务器

shutdown -r now

  5.开启防火墙端口9200(参考另一篇博客)

 6.运行elasticsearch

su esyushengjuncd /usr/local/elasticsearch-7.15.0/bin./elasticsearch 或者 nohup ./elasticsearch & 来启动

7.访问你的服务器ip端口9200

成功

如果报错说内存不足请修改config目录下的jvm.options下的

重启elasticsearch

                                           kibana

 1.kibana介绍

 Kibana是一个针对Elasticsearch的开源分析及可视化平台,用来搜索、查看交互存储在Elasticsearch索引中的数据。使用Kibana,可以通过各种图表进行高级数据分析及展示。

2.kibana安装

     

cd /usr/localtar -zxvf kibana-7.15.0-linux-x86_64.tar.gzcd kibana-7.15.0-linux-x86_64/config/vim kibana.yml//修改以下三处
server.port: 5601
server.host: "0.0.0.0"
elasticsearch.url: "http://127.0.0.1:9200"//保存退出cd /usr/local/kibana-7.15.0-linux-x86_64/bin//启动
./kibana

访问服务器的http://192.168.43.167:5601

3.kibana操作elasticsearch数据

对数据进行操作是在菜单Dev Tools中进行,第一次进入需要点击  进入工作

基本命令如下

# 查询所有数据,加上"track_total_hits": true,可查看总条数,sort排序
GET _search
{"track_total_hits": true,"query": {"match_all": {}},"sort": [{"createTime.keyword": "desc"}]
}# 查询所有索引
GET /_cat/indices?v
GET /_cat/indices?v&h=health,status,index##新增索引
PUT /users##查询索引
GET /users/_mapping# 删除索引
DELETE /household_person##新增索引
PUT /building_back#复制building索引字段为building_back1
POST /_reindex
{"source": {"index": "building"},"dest": {"index": "building_back1","op_type": "create"}
}#拷贝building_back1的数据到building_back
POST /_reindex
{"source": {"index": "building_back1"},"dest": {"index": "building_back"}
}##给索引创建字段
PUT /building_back/_mapping
{"properties" : {"@timestamp" : {"type" : "date"},"@version" : {"type" : "text","fields" : {"keyword" : {"type" : "keyword","ignore_above" : 256}}},"village" : {"type" : "text","fields" : {"keyword" : {"type" : "keyword","ignore_above" : 256}}},"wgs_x" : {"type" : "text","fields" : {"keyword" : {"type" : "keyword","ignore_above" : 256}}},"wgs_y" : {"type" : "text","fields" : {"keyword" : {"type" : "keyword","ignore_above" : 256}}},"x" : {"type" : "text","fields" : {"keyword" : {"type" : "keyword","ignore_above" : 256}}},"y" : {"type" : "text","fields" : {"keyword" : {"type" : "keyword","ignore_above" : 256}}}}}#查询数据
GET /users/_search
{"query": {"match_all": {}}
}#字段查询 match模糊匹配
GET users/_search
{"from":0,"size":2,"query": {"match": {"city":"上海"}}
}##term,添加 .keyword 查询时精确匹配,按username查找
GET /users/_search
{"size": 20,"from": 0,"query": {"terms": {"username.keyword": ["东方","公羊从"]}}
}##term,添加 .keyword 查询时精确匹配,类似select id,grid_id from company_base where grid_id in ('d19bae4cbff94930af2cf54fe36eae60','51131ed432e04260a9b434b6fa181389') and tag='03020103,0301_tag' and tag != '' and id like '%e83844ac2a7655afa5b49bc3c81%' and delete delete != 0;
GET /company_base/_search
{"size": 3000,"from": 0,"query": {"bool": {"must": [{"terms": {"grid_id.keyword": ["d19bae4cbff94930af2cf54fe36eae60","51131ed432e04260a9b434b6fa181389","034f79e1ebe34e29bf9aa42846e0a4a9","8f829c0c1ead4b419c5763e95f11984c","5cd443a286c94ece9919abd00afd248b"]}},{"match": {"deleted": {"query": 0,"operator": "OR","prefix_length": 0,"max_expansions": 50,"fuzzy_transpositions": true,"lenient": false,"zero_terms_query": "NONE","auto_generate_synonyms_phrase_query": true,"boost": 1}}},{"wildcard": {"id.keyword": {"value": "*e83844ac2a7655afa5b49bc3c81*"}}}] ,"must_not": [{"term": {"tag.keyword": ""}}]}},"_source": ["id","grid_id"]
}#根据id查询某条数据
GET /users/_doc/2#只获取字段city,username
GET /users/_doc/1?_source=city,username# 聚集查询 id为1,2的数据
GET /users/_doc/_mget
{
"docs":[{"_id": 2},{"_id": 1}]
}##查询id为1或2的数据
GET /users/_mget
{"ids":[1,2]
}#复杂查询
GET /users/_doc/_search
{"query": {"bool": {"must": [{"match_all": {}}],"must_not": [],"should": []}},"from": 0,"size": 100,"sort": [],"aggs": {}
}####过滤年龄大于10并且小于20
####第一条到第十条
####只展示username和count字段
GET /users/_search
{"query": {"bool": {"filter": {"range": {"count": {"gt": 10,"lte": 20}}}}},"sort": [{"count": {"order": "desc"}}],"from": 0,"size": 10,"_source": ["username","count"]
}#添加一条数据(id已存在时是修改)
PUT /users/_doc/1
{"city": "上海","count": 8,"description": "测试数据2","faceId": "2","id": 1,"logintime": 1625814423047,"username": "gyt"
}#根据id删除某条数据
DELETE /users/_doc/1# 批量插入多个document,_id不指定则系统生成字符串
POST /users/_doc/_bulk 
{"index":{"_id":2}}
{"city": "上海","count": 8, "description": "测试数据2", "faceId": "2", "logintime": 1625814423047, "username": "test1" }
{"index":{}}
{ "city": "上海",  "count": 8, "description": "测试数据2", "faceId": "2",  "logintime": 1625814423047, "username": "test2"}# 批量操作(包含修改和删除)
POST /users/_doc/_bulk
{"update":{"_id":"1"}} 
{"doc":{"city":"北京"}}
{"delete":{"_id":"2"}}# 根据条件批量修改数据
POST /users/_update_by_query
{"query": {"terms": {"username.keyword": ["东方","公羊从"]}},"script": {"source": "ctx._source.city='拉萨'"}
}# 根据条件批量删除数据
POST /users/_delete_by_query
{"query": {"terms": {"username.keyword": ["戚宛","欧阳"]}}
}# 根据条件批量修改数据或者新增字段
POST /building/_update_by_query
{"query": {"match_all": {}},"script": {"source": "ctx._source['deleted'] = '0'"}
}#设置es建筑物分页索引的最大查询限制
PUT building/_settings{"index":{"max_result_window":4000000}
}#查询roadandlanes字段为空的数据
GET /house_base/_search
{"size": 20,"from": 0,"query": {"bool": {"must": [{"terms": {"is_complete.keyword": ["1"]}}],"must_not": [{"exists": {"field": "roadandlanes"}}]}},"_source": ["roadandlanes","is_complete","unit"]
}#_update_by_query的使用博客: https://blog.csdn.net/UbuntuTouch/article/details/105564270    https://www.cnblogs.com/sanduzxcvbnm/p/12719660.html  #根据点位查询附近的数据,并返回距离
GET event/_search
{"query": {"bool": {"must": {"match_all": {}},"filter": {"geo_distance": {"distance": "300m","distance_type": "arc","location": {"lat": 32.0384901759559,"lon": 118.79973514715272}}}}},"sort": [{"_geo_distance": {"location": {"lat": 32.0384901759559,"lon": 118.79973514715272},"order": "desc","unit": "m","distance_type": "arc"}}],"_source": ["describle","report_time"]
}#根据时间段和用分词器相似度查询
GET event/_search
{"query": {"bool": {"must": [{"match": {"describle": {"query": "情形测试|安全隐患|安全生产|null|情形测试|123123|江苏省南京市秦淮区瑞金路街道金城大厦","operator": "OR","analyzer": "ik_smart","prefix_length": 0,"max_expansions": 50,"minimum_should_match": "80%","fuzzy_transpositions": true,"lenient": false,"zero_terms_query": "NONE","auto_generate_synonyms_phrase_query": true,"boost": 1}}},{"range": {"report_time": {"from": "2022-08-10 00:00:00","to": "2022-08-14 23:59:59","include_lower": true,"include_upper": true,"time_zone": "+08:00","format": "yyyy-MM-dd HH:mm:ss","boost": 1}}}],"adjust_pure_negative": true,"boost": 1}},"_source": ["describle","report_time"]
}

4.ik分词器安装

下载地址

1.将下载好的elasticsearch-analysis-ik-6.4.3.rar解压后重命名为ik

2.将ik文件夹上传到服务器的 /usr/local/elasticsearch-6.4.3/plugins 

3.创建文件    

mkdir /usr/local/elasticsearch-6.4.3/plugins/ik/config/custom

4.新建字典

vi /usr/local/elasticsearch-6.4.3/plugins/ik/config/custom/new_word.dic

新增内容之后就可以wq保存了

5.修改ik配置,指向自定义的字典

vi /usr/local/elasticsearch-6.4.3/plugins/ik/config/IKAnalyzer.cfg.xml

保存退出

重启elasticsearch

访问http://182.61.146.9:9200/_analyze会发现王者荣耀也算一个词了

5.es数据迁移工具elasticdump的使用

LINUX的安装:
wget https://nodejs.org/dist/v10.23.1/node-v10.23.1-linux-x64.tar.xzztar xf node-v10.23.1-linux-x64.tar.xzmv node-v10.23.1-linux-x64 /usr/localln -s /usr/local/node-v10.23.1-linux-x64/bin/npm /usr/local/bin/npmln -s /usr/local/node-v10.23.1-linux-x64/bin/node /usr/local/bin/nodenpm init -fnpm install elasticdump# 这里没有安装到全局,需要到node-v8.11.2-linux-x64/node_modules目录下才能找到 elasticdump

  WINDOWS的安装(已安装nodejs):


  工具使用: 


npm install elasticdump -g#-g表示全局可用,直接在终端输入 elasticdump --version,出现版本信息即表示安装成功,如下C:\Users\T470s>elasticdump --version
6.3.3 

 

1

elasticdump  --input=http://localhost:9200/demo --output=D:/ES/date/demo.json

1、将索引中的数据导出到本地

2、将本地数据导入es中

1

elasticdump  --input=D:/ES/date/demo.json --output=http://localhost:9200/demo1

3、将es导入另一个es

elasticdump --input=http://ip:9200/demo --output=http://127.0.0.1:9200/demo#在--input参数和--output参数的的url中添加账号密码
# 例如elasticdump \  --input=http://input-username:input-passowrd@input.es.com:9200/my_index  --output=http://output-username:output-password@output.es.com:9200/my_index  --type=data --limit=10000

#拷贝analyzer分词
elasticdump \--input=http://production.es.com:9200/my_index \--output=http://staging.es.com:9200/my_index \--type=analyzer
'#拷贝映射
elasticdump \--input=http://production.es.com:9200/my_index \--output=http://staging.es.com:9200/my_index \--type=mapping
'#拷贝数据
elasticdump \--input=http://production.es.com:9200/my_index \--output=http://staging.es.com:9200/my_index \--type=data

点击跳转elasticdump  官网:

点击下载node.js linux安装包:

我上传的离线安装包:

命令说明: 

elasticdump: Import and export tools for elasticsearch
version: %%version%%Usage: elasticdump --input SOURCE --output DESTINATION [OPTIONS]--inputSource location (required)
--input-indexSource index and type(default: all, example: index/type)
--outputDestination location (required)
--output-indexDestination index and type(default: all, example: index/type)
--overwriteOverwrite output file if it exists(default: false)                    
--limitHow many objects to move in batch per operationlimit is approximate for file streams(default: 100)
--sizeHow many objects to retrieve(default: -1 -> no limit)
--concurrencyThe maximum number of requests the can be made concurrently to a specified transport.(default: 1)       
--concurrencyIntervalThe length of time in milliseconds in which up to <intervalCap> requests can be madebefore the interval request count resets. Must be finite.(default: 5000)       
--intervalCapThe maximum number of transport requests that can be made within a given <concurrencyInterval>.(default: 5)
--carryoverConcurrencyCountIf true, any incomplete requests from a <concurrencyInterval> will be carried over tothe next interval, effectively reducing the number of new requests that can be createdin that next interval.  If false, up to <intervalCap> requests can be created in thenext interval regardless of the number of incomplete requests from the previous interval.(default: true)                                                                                       
--throttleIntervalDelay in milliseconds between getting data from an inputTransport and sending it to anoutputTransport.(default: 1)
--debugDisplay the elasticsearch commands being used(default: false)
--quietSuppress all messages except for errors(default: false)
--typeWhat are we exporting?(default: data, options: [settings, analyzer, data, mapping, policy, alias, template, component_template, index_template])
--filterSystemTemplatesWhether to remove metrics-*-* and logs-*-* system templates (default: true])
--templateRegexRegex used to filter templates before passing to the output transport (default: ((metrics|logs|\\..+)(-.+)?)
--deleteDelete documents one-by-one from the input as they aremoved.  Will not delete the source index(default: false)
--searchBodyPreform a partial extract based on search resultswhen ES is the input, default values areif ES > 5`'{"query": { "match_all": {} }, "stored_fields": ["*"], "_source": true }'`else`'{"query": { "match_all": {} }, "fields": ["*"], "_source": true }'`[As of 6.68.0] If the searchBody is preceded by a @ symbol, elasticdump will perform a file lookupin the location specified. NB: File must contain valid JSON
--searchWithTemplateEnable to use Search Template when using --searchBodyIf using Search Template then searchBody has to consist of "id" field and "params" objectsIf "size" field is defined within Search Template, it will be overridden by --size parameterSee https://www.elastic.co/guide/en/elasticsearch/reference/current/search-template.html for further information(default: false)
--headersAdd custom headers to Elastisearch requests (helpful whenyour Elasticsearch instance sits behind a proxy)(default: '{"User-Agent": "elasticdump"}')Type/direction based headers are supported .i.e. input-headers/output-headers (these will only be added based on the current flow type input/output)
--paramsAdd custom parameters to Elastisearch requests uri. Helpful when you for examplewant to use elasticsearch preference--input-params is a specific params extension that can be used when fetching data with the scroll api--output-params is a specific params extension that can be used when indexing data with the bulk index apiNB : These were added to avoid param pollution problems which occur when an input param is used in an output source(default: null)
--sourceOnlyOutput only the json contained within the document _sourceNormal: {"_index":"","_type":"","_id":"", "_source":{SOURCE}}sourceOnly: {SOURCE}(default: false)
--ignore-errorsWill continue the read/write loop on write error(default: false)
--scrollIdThe last scroll Id returned from elasticsearch. This will allow dumps to be resumed used the last scroll Id &`scrollTime` has not expired.
--scrollTimeTime the nodes will hold the requested search in order.(default: 10m)--scroll-with-postUse a HTTP POST method to perform scrolling instead of the default GET(default: false)--maxSocketsHow many simultaneous HTTP requests can we process make?(default:5 [node <= v0.10.x] /Infinity [node >= v0.11.x] )
--timeoutInteger containing the number of milliseconds to wait fora request to respond before aborting the request. Passeddirectly to the request library. Mostly used when you don'tcare too much if you lose some data when importingbut rather have speed.
--offsetInteger containing the number of rows you wish to skipahead from the input transport.  When importing a largeindex, things can go wrong, be it connectivity, crashes,someone forgetting to `screen`, etc.  This allows youto start the dump again from the last known line written(as logged by the `offset` in the output).  Please beadvised that since no sorting is specified when thedump is initially created, there's no real way toguarantee that the skipped rows have already beenwritten/parsed.  This is more of an option for whenyou want to get most data as possible in the indexwithout concern for losing some rows in the process,similar to the `timeout` option.(default: 0)
--noRefreshDisable input index refresh.Positive:1. Much increase index speed2. Much less hardware requirementsNegative:1. Recently added data may not be indexedRecommended to use with big data indexing,where speed and system health in a higher prioritythan recently added data.
--inputTransportProvide a custom js file to use as the input transport
--outputTransportProvide a custom js file to use as the output transport
--toLogWhen using a custom outputTransport, should log linesbe appended to the output stream?(default: true, except for `$`)
--transformA method/function which can be called to modify documentsbefore writing to a destination. A global variable 'doc'is available.Example script for computing a new field 'f2' as doubledvalue of field 'f1':doc._source["f2"] = doc._source.f1 * 2;May be used multiple times.Additionally, transform may be performed by a module. See [Module Transform](#module-transform) below.
--awsChainUse [standard](https://aws.amazon.com/blogs/security/a-new-and-standardized-way-to-manage-credentials-in-the-aws-sdks/) location and ordering for resolving credentials including environment variables, config files, EC2 and ECS metadata locations_Recommended option for use with AWS_Use [standard](https://aws.amazon.com/blogs/security/a-new-and-standardized-way-to-manage-credentials-in-the-aws-sdks/) location and ordering for resolving credentials including environment variables, config files, EC2 and ECS metadata locations _Recommended option for use with AWS_
--awsAccessKeyId
--awsSecretAccessKeyWhen using Amazon Elasticsearch Service protected byAWS Identity and Access Management (IAM), provideyour Access Key ID and Secret Access Key.--sessionToken can also be optionally provided if using temporary credentials
--awsIniFileProfileAlternative to --awsAccessKeyId and --awsSecretAccessKey,loads credentials from a specified profile in aws ini file.For greater flexibility, consider using --awsChainand setting AWS_PROFILE and AWS_CONFIG_FILEenvironment variables to override defaults if needed
--awsIniFileNameOverride the default aws ini file name when using --awsIniFileProfileFilename is relative to ~/.aws/(default: config)
--awsServiceSets the AWS service that the signature will be generated for(default: calculated from hostname or host)
--awsRegionSets the AWS region that the signature will be generated for(default: calculated from hostname or host)
--awsUrlRegexOverrides the default regular expression that is used to validate AWS urls that should be signed(default: ^https?:\/\/.*\.amazonaws\.com.*$)
--support-big-int   Support big integer numbers
--big-int-fields   Sepcifies a comma-seperated list of fields that should be checked for big-int support(default '')
--retryAttempts  Integer indicating the number of times a request should be automatically re-attempted before failingwhen a connection fails with one of the following errors `ECONNRESET`, `ENOTFOUND`, `ESOCKETTIMEDOUT`,ETIMEDOUT`, `ECONNREFUSED`, `EHOSTUNREACH`, `EPIPE`, `EAI_AGAIN`(default: 0)--retryDelay   Integer indicating the back-off/break period between retry attempts (milliseconds)(default : 5000)            
--parseExtraFieldsComma-separated list of meta-fields to be parsed  
--maxRowssupports file splitting.  Files are split by the number of rows specified
--fileSizesupports file splitting.  This value must be a string supported by the **bytes** module.     The following abbreviations must be used to signify size in terms of units         b for byteskb for kilobytesmb for megabytesgb for gigabytestb for terabytese.g. 10mb / 1gb / 1tbPartitioning helps to alleviate overflow/out of memory exceptions by efficiently segmenting filesinto smaller chunks that then be merged if needs be.
--fsCompressgzip data before sending output to file.On import the command is used to inflate a gzipped file
--s3AccessKeyIdAWS access key ID
--s3SecretAccessKeyAWS secret access key
--s3RegionAWS region
--s3Endpoint        AWS endpoint can be used for AWS compatible backends such asOpenStack Swift and OpenStack Ceph
--s3SSLEnabled      Use SSL to connect to AWS [default true]--s3ForcePathStyle  Force path style URLs for S3 objects [default false]--s3Compressgzip data before sending to s3  
--s3ServerSideEncryptionEnables encrypted uploads
--s3SSEKMSKeyIdKMS Id to be used with aws:kms uploads                    
--s3ACLS3 ACL: private | public-read | public-read-write | authenticated-read | aws-exec-read |bucket-owner-read | bucket-owner-full-control [default private]--retryDelayBaseThe base number of milliseconds to use in the exponential backoff for operation retries. (s3)
--customBackoffActivate custom customBackoff function. (s3)
--tlsAuthEnable TLS X509 client authentication
--cert, --input-cert, --output-certClient certificate file. Use --cert if source and destination are identical.Otherwise, use the one prefixed with --input or --output as needed.
--key, --input-key, --output-keyPrivate key file. Use --key if source and destination are identical.Otherwise, use the one prefixed with --input or --output as needed.
--pass, --input-pass, --output-passPass phrase for the private key. Use --pass if source and destination are identical.Otherwise, use the one prefixed with --input or --output as needed.
--ca, --input-ca, --output-caCA certificate. Use --ca if source and destination are identical.Otherwise, use the one prefixed with --input or --output as needed.
--inputSocksProxy, --outputSocksProxySocks5 host address
--inputSocksPort, --outputSocksPortSocks5 host port
--handleVersionTells elastisearch transport to handle the `_version` field if present in the dataset(default : false)
--versionTypeElasticsearch versioning types. Should be `internal`, `external`, `external_gte`, `force`.NB : Type validation is handled by the bulk endpoint and not by elasticsearch-dump
--csvDelimiter        The delimiter that will separate columns.(default : ',')
--csvFirstRowAsHeaders        If set to true the first row will be treated as the headers.(default : true)
--csvRenameHeaders        If you want the first line of the file to be removed and replaced by the one provided in the `csvCustomHeaders` option(default : true)
--csvCustomHeaders  A comma-seperated listed of values that will be used as headers for your data. This param mustbe used in conjunction with `csvRenameHeaders`(default : null)
--csvWriteHeaders   Determines if headers should be written to the csv file.(default : true)
--csvIgnoreEmpty        Set to true to ignore empty rows. (default : false)
--csvSkipLines        If number is > 0 the specified number of lines will be skipped.(default : 0)
--csvSkipRows        If number is > 0 then the specified number of parsed rows will be skippedNB:  (If the first row is treated as headers, they aren't a part of the count)(default : 0)
--csvMaxRows        If number is > 0 then only the specified number of rows will be parsed.(e.g. 100 would return the first 100 rows of data)(default : 0)
--csvTrim        Set to true to trim all white space from columns.(default : false)
--csvRTrim        Set to true to right trim all columns.(default : false)
--csvLTrim        Set to true to left trim all columns.(default : false)   
--csvHandleNestedData        Set to true to handle nested JSON/CSV data. NB : This is a very optioninated implementaton !(default : false)
--csvIdColumn        Name of the column to extract the record identifier (id) fromWhen exporting to CSV this column can be used to override the default id (@id) column name(default : null)   
--csvIndexColumn        Name of the column to extract the record index fromWhen exporting to CSV this column can be used to override the default index (@index) column name(default : null)
--csvTypeColumn        Name of the column to extract the record type fromWhen exporting to CSV this column can be used to override the default type (@type) column name(default : null)              
--helpThis page