// An highlighted block
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public static List<String> getHdfsTxt(String hdfsTxtPath, Configuration conf) {
StringBuffer buffer = new StringBuffer();
FSDataInputStream fsr = null;
BufferedReader bufferedReader = null;
String lineTxt = null;
List<String> mylist = new ArrayList<String>();
try {
FileSystem fs = FileSystem.get(URI.create(hdfsTxtPath), conf);
fsr = fs.open(new Path(hdfsTxtPath));
bufferedReader = new BufferedReader(new InputStreamReader(fsr));
while ((lineTxt = bufferedReader.readLine()) != null)
{
mylist.add(lineTxt);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if (bufferedReader != null) {
try {
bufferedReader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return mylist;
}
Java逐行读取hdfs文件
最新推荐文章于 2026-06-24 23:43:29 发布
该代码段展示了一个Java方法,用于从Hadoop HDFS(Hadoop Distributed File System)中读取指定路径的文本文件内容,并将内容存储到一个List<String>中。方法使用了Apache Hadoop的FileSystem API来打开并读取文件,通过BufferedReader逐行读取并添加到列表。

1333

被折叠的 条评论
为什么被折叠?



