scan的调用代码示例如下:
// 创建HBase配置config
Configuration config = HBaseConfiguration.create();
config.set("hbase.zookeeper.quorum", "192.168.1.226");// zookeeper部署的服务器IP
config.set("hbase.zookeeper.property.clientPort", "2181");// zookeeper允许连接的客户端端口号
// 定义HBase连接
Connection connection = null;
try {
// 获取HBase数据库连接
connection = ConnectionFactory.createConnection(config);
// 输出连接建立结果
while (!connection.isClosed()) {
break;
}
// 获取HBase数据库表
Table table = connection.getTable(TableName.valueOf("test_table"));
// 构造Scan实例
Scan scan = new Scan();
scan.addColumn(Bytes.toBytes("family1"), Bytes.toBytes("cloumn1"));
// 获取查询结果
ResultScanner result = table.getScanner(scan);
// 解析查询结果
for (Result r : result) {
// 此处为处理Result的代码
byte[] row = r.getRow();
if(row.length == 0){
//...
}
}
result.close();
table.close();
table = null;
scan = null;
} catch (IOException e) {
e.printStackTrace();
} finally {
if (connection != null) {
try {
connection.close();
} catch (IOException e) {
e.printStackTrace();
}
}
} 下面,我们对scan的整个流程进行分析。
首先从Table的getScanner(Scan scan)方法入手,它的定义如下:
/**
* Returns a scanner on the current table as specified by the {@link Scan}
* object.
* Note that the passed {@link Scan}'s start row and caching properties
* maybe changed.
*
* @param scan A configured {@link Scan} object.
* @return A scanner.
* @throws IOException if a remote or network exception occurs.
* @since 0.20.0
*/
ResultScanner getScanner(Scan scan) throws IOException; 它的实现是由HTable来完成的,源码如下:
/**
* The underlying {@link HTable} must not be closed.
* {@link HTableInterface#getScanner(Scan)} has other usage details.
*
* HBase中scan的入口方法
*/
@Override
public ResultScanner getScanner(final Scan scan) throws IOException {
// small scan不可以设置batch
if (scan.getBatch() > 0 && scan.isSmall()) {
throw new IllegalArgumentException("Small scan should not be used with batching");
}
// 设置caching
// 取参数“hbase.client.scanner.caching”,如果参数未配置,则默认为100
if (scan.getCaching() <= 0) {
scan.setCaching(getScannerCaching());
}
// 取参数“HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE_KEY”,如果参数未配置,则默认为2 * 1024 * 1024
if (scan.getMaxResultSize() <= 0) {
scan.setMaxResultSize(scannerMaxResultSize);
}
/**
* scan总共分为四种类型:
* 1、reversed、small--ClientSmallReversedScanner
* 2、reversed、big--ReversedClientScanner
* 3、notReversed、small--ClientSmallScanner
* 4、notReversed、big--ClientScanner
*/
if (scan.isReversed()) {// 反向扫描
if (scan.isSmall()) {
return new ClientSmallReversedScanner(getConfiguration(), scan, getName(),
this.connection, this.rpcCallerFactory, this.rpcControllerFactory,
pool, tableConfiguration.getReplicaCallTimeoutMicroSecondScan());
} else {
return new ReversedClientScanner(getConfiguration(), scan, getName(),
this.connection, this.rpcCallerFactory, this.rpcControllerFactory,
pool, tableConfiguration.getReplicaCallTimeoutMicroSecondScan());
}
}
if (scan.isSmall()) {
return new ClientSmallScanner(getConfiguration(), scan, getName(),
this.connection, this.rpcCallerFactory, this.rpcControllerFactory,
pool, tableConfiguration.getReplicaCallTimeoutMicroSecondScan());
} else {
return new ClientScanner(getConfiguration(), scan, getName(), this.connection,
this.rpcCallerFactory, this.rpcControllerFactory,
pool, tableConfiguration.getReplicaCallTimeoutMicroSecondScan());
}
} 这里,我们先只研究ClientScanner,其他三种以后再说。
/**
* Create a new ClientScanner for the specified table Note that the passed {@link Scan}'s start
* row maybe changed changed.
* @param conf The {@link Configuration} to use.
* @param scan {@link Scan} to use in this scanner
* @param tableName The table that we wish to scan
* @param connection Connection identifying the cluster
* @throws IOException
*/
public ClientScanner(final Configuration conf, final Scan scan, final TableName tableName,
ClusterConnection connection, RpcRetryingCallerFactory rpcFactory,
RpcControllerFactory controllerFactory, ExecutorService pool, int primaryOperationTimeout) throws IOException {
if (LOG.isTraceEnabled()) {
LOG.trace("Scan table=" + tableName
+ ", startRow=" + Bytes.toStringBinary(scan.getStartRow()));
}
// 设置scan、tableName等成员变量
this.scan = scan;
this.tableName = tableName;
this.lastNext = System.currentTimeMillis();
this.connection = connection;
this.pool = pool;
this.primaryOperationTimeout = primaryOperationTimeout;
// 重试次数,取参数“hbase.client.retries.number”,如果参数未配置,则默认为31
this.retries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER);
if (scan.getMaxResultSize() > 0) {
this.maxScannerResultSize = scan.getMaxResultSize();
} else {
this.maxScannerResultSize = conf.getLong(
HConstants.HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE_KEY,
HConstants.DEFAULT_HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE);
}
// scanner超时时间
this.scannerTimeout = HBaseConfiguration.getInt(conf,
HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD,
HConstants.HBASE_REGIONSERVER_LEASE_PERIOD_KEY,
HConstants.DEFAULT_HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD);
// check if application wants to collect scan metrics
initScanMetrics(scan);
// Use the caching from the Scan. If not set, use the default cache setting for this table.
// 处理caching
if (this.scan.getCaching() > 0) {
this.caching = this.scan.getCaching();
} else {
this.caching = conf.getInt(
HConstants.HBASE_CLIENT_SCANNER_CACHING,
HConstants.DEFAULT_HBASE_CLIENT_SCANNER_CACHING);
}
// 初始化caller
// caller为RpcRetryingCaller类型
this.caller = rpcFactory.<Result[]> newCaller();
// rpcControllerFactory为RpcControllerFactory类型
this.rpcControllerFactory = controllerFactory;
this.conf = conf;
// 初始化scanner
initializeScannerInConstruction();
} ClientScanner的构造方法中,首先是对各种成员变量赋值,比如scan、tableName、connection等,然后是处理maxScannerResultSize、scannerTimeout、caching等scan需要用到的各种参数,这些都没有什么好说的。
接下来,是初始化两个重要的变量caller和rpcControllerFactory,caller为RpcRetryingCaller类型的,rpcControllerFactory为RpcControllerFactory类型的。
最后调用initializeScannerInConstruction()方法,ok,我们也跟着继续。
protected void initializeScannerInConstruction() throws IOException{
// initialize the scanner
// 初始化scanner
nextScanner(this.caching, false);
} 紧接着,调用nextScanner()方法,注意,传入两个参数,一个是ClientScanner对象生成时的caching,另外一个是false。
这个caching,如果在构造Scan对象时没有设置,则取参数hbase.client.scanner.caching配置的值,参数未配置则默认为100,它的含义是每次RPC请求的最大行数。
继续追踪nextScanner()方法,完整的代码如下:
/*
* Gets a scanner for the next region. If this.currentRegion != null, then
* we will move to the endrow of this.currentRegion. Else we will get
* scanner at the scan.getStartRow(). We will go no further, just tidy
* up outstanding scanners, if <code>currentRegion != null</code> and
* <code>done</code> is true.
* @param nbRows
* @param done Server-side says we're done scanning.
*/
protected boolean nextScanner(int nbRows, final boolean done)
throws IOException {
// Close the previous scanner if it's open
// 关闭之前打开的scanner
// 第一次调用时,callable因为没有初始化,所以肯定是空的,为null,此处会跳过
// 什么时候callable被赋值,而什么时候callable又被清空呢?
// 关闭上一个callable
if (this.callable != null) {
// 调用setClose()方法将callable中的currentScannerCallable的closed设置为true
// 将ScannerCallableWithReplicas类型的callable中ScannerCallable类型的成员变量中的closed设置为true
this.callable.setClose();
// 调用call()方法,发起一次请求,此时callable不为空,且其currentScannerCallable中closed为true
// 最终调用

&spm=1001.2101.3001.5002&articleId=50507256&d=1&t=3&u=03839872f5f943b1987b7e36ad481442)
1750

被折叠的 条评论
为什么被折叠?



