flume HTTPSource默认的是utf-8的编码,flume服务用在linux机器上都是没问题的,但当把flume服务部署在windows机器上的时候,会出现乱码问题 。从源码检查乱码出现在那里:
1.在刚接收到请求的时候,打印requestBody里的内容是正常的,数据正常。
2.数据存到event里之后,打印event里的内容,出现乱码。
源码里有个JSONHandler的类处理了request的请求,然后将request的内容放到了event里。代码里都是用“utf-8”进行编码的,而windows机器默认的编码是“GBK”,猜测是这里的问题,将编码统一换成“GBK”中文不乱码了。将源码改动了下边两个地方。
package org.apache.flume.source.http;import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.google.gson.JsonSyntaxException; import com.google.gson.reflect.TypeToken; import org.apache.flume.Context; import org.apache.flume.Event; import org.apache.flume.event.EventBuilder; import org.apache.flume.event.JSONEvent; import org.slf4j.Logger; import org.slf4j.LoggerFactory;import javax.servlet.ServletInputStream; import javax.servlet.http.HttpServletRequest; import java.io.BufferedReader; import java.io.InputStreamReader; import java.lang.reflect.Type; import java.nio.charset.Charset; import java.nio.charset.UnsupportedCharsetException; import java.util.ArrayList; import java.util.List;public class JSONHandlerTest implements HTTPSourceHandler {private static final Logger LOG = LoggerFactory.getLogger(JSONHandlerTest.class);private final Type listType = new TypeToken<List<JSONEvent>>() {}.getType();private final Gson gson;public JSONHandlerTest() {gson = new GsonBuilder().disableHtmlEscaping().create();}/*** {@inheritDoc}*/@Overridepublic List<Event> getEvents(HttpServletRequest request) throws Exception {BufferedReader reader = request.getReader();String charset = request.getCharacterEncoding();//UTF-8 is default for JSON. If no charset is specified, UTF-8 is to//be assumed.if (charset == null) {LOG.debug("Charset is null, default charset of UTF-8 will be used.");charset = "UTF-8";} else if (!(charset.equalsIgnoreCase("utf-8")|| charset.equalsIgnoreCase("utf-16")|| charset.equalsIgnoreCase("utf-32"))) {LOG.error("Unsupported character set in request {}. "+ "JSON handler supports UTF-8, "+ "UTF-16 and UTF-32 only.", charset);throw new UnsupportedCharsetException("JSON handler supports UTF-8, "+ "UTF-16 and UTF-32 only.");}/** Gson throws Exception if the data is not parseable to JSON.* Need not catch it since the source will catch it and return error.*/List<Event> eventList = new ArrayList<Event>(0);try {eventList = gson.fromJson(reader, listType);} catch (JsonSyntaxException ex) {throw new HTTPBadRequestException("Request has invalid JSON Syntax.", ex);}for (Event e : eventList) {((JSONEvent) e).setCharset("GBK");}return getSimpleEvents(eventList);}@Overridepublic void configure(Context context) {}private List<Event> getSimpleEvents(List<Event> events) {Charset ch=Charset.forName("GBK");List<Event> newEvents = new ArrayList<Event>(events.size());for (Event e:events) { //生成event的时候,用“GBK”编码。newEvents.add(EventBuilder.withBody(new String (e.getBody()),ch,e.getHeaders()));}return newEvents;} }
个人观点,欢迎指正。