当前位置: 代码迷 >> 数据仓库 >> Spring batch实现数据仓库ETL 框架搭建(1)
  详细解决方案

Spring batch实现数据仓库ETL 框架搭建(1)

热度:107   发布时间:2016-05-05 15:43:29.0
Spring batch实现数据仓库ETL 框架搭建(一)
暂时还没完成,只供自己参考
参考链接:
http://www.yihaomen.com/article/java/433.htm
http://www.zuidaima.com/share/1732772811131904.htm
http://13146489.iteye.com/blog/1412295

实现的具体功能:
step1 : 从 A 文件夹中读取csv 文件,处理之后,写到数据库中保存

App.java
package yihaomen;import java.util.HashMap;import java.util.Map;import java.util.Random;import org.springframework.batch.core.Job;import org.springframework.batch.core.JobExecution;import org.springframework.batch.core.JobParameter;import org.springframework.batch.core.JobParameters;import org.springframework.batch.core.launch.JobLauncher;import org.springframework.context.ApplicationContext;import org.springframework.context.support.ClassPathXmlApplicationContext;public class App {	public static void main(String[] args) {		String[] springConfig  = 			{					"spring/batch/jobs/job-hello-world.xml" 			};				ApplicationContext context = 				new ClassPathXmlApplicationContext(springConfig);				JobLauncher jobLauncher = (JobLauncher) context.getBean("jobLauncher");		Job job = (Job) context.getBean("helloWorldJob");		try {       			Map<String,JobParameter> parameters = new HashMap<String,JobParameter>();			parameters.put("RUN_MONTH_KEY", new JobParameter(Math.random()));//因为一个job parameter只能用一次,所以为了多次测试使用,所以我直接用随机数			JobExecution execution = jobLauncher.run(job, new JobParameters(parameters));			System.out.println("Exit Status : " + execution.getStatus());		} catch (Exception e) {			e.printStackTrace();		}		System.out.println("Done");	}}

CustomItemProcessor.java
package yihaomen;import org.springframework.batch.item.ItemProcessor;import yihaomen.model.Report;public class CustomItemProcessor implements ItemProcessor<Report, Report> {	@Override	public Report process(Report item) throws Exception {				System.out.println("Processing..." + item);		return item;	}}

ReportFieldSetMapper.java
package yihaomen;import java.text.ParseException;import java.text.SimpleDateFormat;import org.springframework.batch.item.file.mapping.FieldSetMapper;import org.springframework.batch.item.file.transform.FieldSet;import org.springframework.validation.BindException;import yihaomen.model.Report;public class ReportFieldSetMapper implements FieldSetMapper<Report> {	private SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");		@Override	public Report mapFieldSet(FieldSet fieldSet) throws BindException {				Report report = new Report();		report.setId(fieldSet.readInt(0));		report.setSales(fieldSet.readBigDecimal(1));		report.setQty(fieldSet.readInt(2));		report.setStaffName(fieldSet.readString(3));				//default format yyyy-MM-dd		//fieldSet.readDate(4);		String date = fieldSet.readString(4);		try {			report.setDate(dateFormat.parse(date));		} catch (ParseException e) {			e.printStackTrace();		}				return report;			}}

ReportWrite2Database.java
package yihaomen;import java.util.List;import org.springframework.batch.item.ItemWriter;import yihaomen.model.Report;public class ReportWrite2Database  implements ItemWriter<Report>{	@Override	public void write(List<? extends Report> items) throws Exception {		System.out.println("开始存储到数据库report表中.......");			for (Report r : items ){				System.out.println(r.toString());			}			System.out.println("存储完毕!.......");	}			}

Report.java
package yihaomen.model;import java.math.BigDecimal;import java.util.Date;import javax.xml.bind.annotation.XmlAttribute;import javax.xml.bind.annotation.XmlElement;import javax.xml.bind.annotation.XmlRootElement;@XmlRootElement(name = "record")public class Report {	private int id;	private BigDecimal sales;	private int qty;	private String staffName;	private Date date;	@XmlAttribute(name = "id")	public int getId() {		return id;	}	public void setId(int id) {		this.id = id;	}	@XmlElement(name = "sales")	public BigDecimal getSales() {		return sales;	}	public void setSales(BigDecimal sales) {		this.sales = sales;	}	@XmlElement(name = "qty")	public int getQty() {		return qty;	}	public void setQty(int qty) {		this.qty = qty;	}	@XmlElement(name = "staffName")	public String getStaffName() {		return staffName;	}	public void setStaffName(String staffName) {		this.staffName = staffName;	}	public Date getDate() {		return date;	}	public void setDate(Date date) {		this.date = date;	}	@Override	public String toString() {		return "Report [id=" + id + ", sales=" + sales + ", qty=" + qty + ", staffName=" + staffName + "]";	}}

report.csv
1001,"213,100",980,yihaomen, 2013-01-011002,"320,200",1080,staff 1, 2013-01-011003,"342,197",1200,staff 2, 2013-01-01


context.xml
<beans xmlns="http://www.springframework.org/schema/beans"	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"	xsi:schemaLocation="		http://www.springframework.org/schema/beans 		http://www.springframework.org/schema/beans/spring-beans-3.1.xsd">	<!-- stored job-meta in memory -->	<!--  	<bean id="jobRepository"		class="org.springframework.batch.core.repository.support.MapJobRepositoryFactoryBean">		<property name="transactionManager" ref="transactionManager" />	</bean> 	 --> 	  	 <!-- stored job-meta in database -->	<bean id="jobRepository"		class="org.springframework.batch.core.repository.support.JobRepositoryFactoryBean">		<property name="dataSource" ref="dataSource" />		<property name="transactionManager" ref="transactionManager" />		<property name="databaseType" value="oracle" />	</bean>		<bean id="transactionManager"		class="org.springframework.batch.support.transaction.ResourcelessTransactionManager" />	 	<bean id="jobLauncher"		class="org.springframework.batch.core.launch.support.SimpleJobLauncher">		<property name="jobRepository" ref="jobRepository" />	</bean></beans>

database.xml
<beans xmlns="http://www.springframework.org/schema/beans"	xmlns:jdbc="http://www.springframework.org/schema/jdbc" 	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"	xsi:schemaLocation="http://www.springframework.org/schema/beans 		http://www.springframework.org/schema/beans/spring-beans-3.1.xsd		http://www.springframework.org/schema/jdbc 		http://www.springframework.org/schema/jdbc/spring-jdbc-3.1.xsd">    <!-- connect to database -->	<bean id="dataSource"		class="org.springframework.jdbc.datasource.DriverManagerDataSource">		<property name="driverClassName" value="oracle.jdbc.OracleDriver" />		<property name="url" value="jdbc:oracle:thin:@localhost:1521:orcl" />		<property name="username" value="evan" />		<property name="password" value="880823" />	</bean>	<bean id="transactionManager"		class="org.springframework.batch.support.transaction.ResourcelessTransactionManager" />		<!-- create job-meta tables automatically -->	<!-- spring batch 2.2.3 里面的语句有点问题,在后面两个需要加"," 号,自己可以拷贝出来,修改执行.	<jdbc:initialize-database data-source="dataSource">	    		<jdbc:script location="org/springframework/batch/core/schema-drop-oracle10g.sql" />		<jdbc:script location="org/springframework/batch/core/schema-oracle10g.sql" />	</jdbc:initialize-database>	 -->	</beans>

job-hello-world.xml
<beans xmlns="http://www.springframework.org/schema/beans"	xmlns:batch="http://www.springframework.org/schema/batch" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"	xsi:schemaLocation="http://www.springframework.org/schema/batch		http://www.springframework.org/schema/batch/spring-batch-2.2.xsd		http://www.springframework.org/schema/beans 		http://www.springframework.org/schema/beans/spring-beans-3.1.xsd	">	<import resource="../config/context.xml" />	<import resource="../config/database.xml" />	<bean id="report" class="yihaomen.model.Report" scope="prototype" />	<bean id="itemProcessor" class="yihaomen.CustomItemProcessor" />	<batch:job id="helloWorldJob">		<batch:step id="step1">			<batch:tasklet>				<batch:chunk reader="cvsFileItemReader" writer="xmlItemWriter" processor="itemProcessor"					commit-interval="10">				</batch:chunk>			</batch:tasklet>		</batch:step>	</batch:job>	<bean id="cvsFileItemReader" class="org.springframework.batch.item.file.FlatFileItemReader">		<property name="resource" value="classpath:cvs/input/report.csv" />		<property name="lineMapper">			<bean class="org.springframework.batch.item.file.mapping.DefaultLineMapper">				<property name="lineTokenizer">					<bean						class="org.springframework.batch.item.file.transform.DelimitedLineTokenizer">						<property name="names" value="id,sales,qty,staffName,date" />					</bean>				</property>				<property name="fieldSetMapper">				    <bean class="yihaomen.ReportFieldSetMapper" />				    				    <!-- if no data type conversion, use BeanWrapperFieldSetMapper to map by name					<bean						class="org.springframework.batch.item.file.mapping.BeanWrapperFieldSetMapper">						<property name="prototypeBeanName" value="report" />					</bean>					 -->				</property>			</bean>		</property>	</bean><!--	<bean id="xmlItemWriter" class="org.springframework.batch.item.xml.StaxEventItemWriter">		<property name="resource" value="file:xml/outputs/report.xml" />		<property name="marshaller" ref="reportMarshaller" />		<property name="rootTagName" value="report" />	</bean>	-->		<bean id="xmlItemWriter" class="yihaomen.ReportWrite2Database">	</bean>		<bean id="reportMarshaller" class="org.springframework.oxm.jaxb.Jaxb2Marshaller">		<property name="classesToBeBound">			<list>				<value>yihaomen.model.Report</value>			</list>		</property>	</bean></beans>

框架结构:




引用的jar包











运行结果:




  相关解决方案