hdfs 简单的api 读写文件
import java.io.BufferedReader;import java.io.BufferedWriter;import java.io.FileInputStream;import java.io.FileWriter;import java.io.InputStreamReader;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataInputStream;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class TestRead {/** * @param args * @throws Throwable */public static void main(String[] args) throws Throwable {String path = args[0];String confFile = args[1];Configuration conf = new Configuration(); FileInputStream fis = new FileInputStream(confFile);conf.addResource(fis);FileSystem fileSystem = FileSystem.get(conf);FSDataInputStream fs = fileSystem.open(new Path(path));BufferedReader bis = new BufferedReader(new InputStreamReader(fs,"GBK"));FileWriter fos = new FileWriter(args[2]);BufferedWriter bw = new BufferedWriter(fos); String temp; int i=0; while ((temp = bis.readLine()) != null) { bw.write(temp); System.out.println(temp); if(temp.startsWith("</doc>")) { break; } if(temp.indexOf("上海") > -1) { System.out.println("发现你了 coming"); break; } } bw.close(); bis.close(); fileSystem.close();}}?
需要注意的几点:
1:读写file最好都用BufferedXXX
2:如果是读取byte 写文件,不存在字符集问题
3:如果是读取字符请注意 字符集编码问题,例如我是设置读取GBK编码
InputStreamReader(fs,"GBK")