hbase集群维护总结
????????????????????????????????????????????????????????????????????hbase集群维护总结
1、集群故障现象以及解决方法
?
故障现象
解决方法
“hbase shell”命令无法显示list以及meta丢失
查看集群hbase的regionserver日志;一般重启hbase
Hbase表显示不全
“Hbase hbck”命令查看具体细节,很有可能有”no empty start key”的错误
First region should start with an empty key
hbase 5599;创建一个新的region,startkey为空
“hbase hbck”产生黑洞和?Found inconsistency
1.? 移出hbase表报错和不一致的region块到hdfs的自定义文件夹
2.? 从 .META.表中delete对应的错误和不一致块
3.? Merge上一个region的 endkey和下一个region的startkey不一致的region块;merge时需要停hbase/disable table
Hbase merge时,提示hbase-site.xml中fs.default.name配置错误
需要配置fs.default.name项和core-site.xml一致
?
?
?
2、附具体操作:
?? 找出“hbase hbck”产生黑洞和?Found inconsistency的region块脚本。
?
?? “hbase hbck”产生黑洞和?Found inconsistency的region块放到一起并从.META.表删除的脚本,hbase_mvdelete_region.sh:
?
#!/bin/bash
?
Files=`hadoop fs -ls /tmp | awk '{print$8}'`
?
for file in $Files
do
?
#get the block rowkey
?
rowkey=`hadoop fs -cat $file/.regioninfo | awk -F "REGION => {NAME => '" '{print $2}'? | awk -F "', STARTKEY" '{print $1}' | awk END'{print $0}'`
?
#1:delete region block
echo "deleteall '.META.', '$rowkey'" | hbase shell
?
#2: mv tmp block
hadoop fs -mv $file /done
?
done
?
?
?? 找出表对应region的startkey和endkey, hbase_region.sh,为merge做准备:
?
#!/bin/bash
if [ $# -ne 2 ]
then
??????? echo "usage:/done /hbase/ACC_ZDR param => 2"
??????? exit 1
fi
?
#/done, /hbase/ACC_ZDR
badfiles=`hadoop fs -ls $1 | awk '{print$8}'`
?
?
for file in $badfiles
do
tablename=`hadoop fs -cat $file/.regioninfo | awk -F "REGION => {NAME => '" '{print $2}' | awk -F ',' '{print $1}' | awk END'{print $0}'`
regionname=`hadoop fs -cat $file/.regioninfo | awk -F " ENCODED => " '{print $2}'? | awk -F ", TABLE =>" '{print $1}' | awk END'{print $0}'`
startkey=`hadoop fs -cat $file/.regioninfo | awk -F "STARTKEY => '" '{print $2}'? | awk -F "', ENDKEY" '{print $1}' | awk END'{print $0}'`
endkey=`hadoop fs -cat $file/.regioninfo | awk -F "ENDKEY => '" '{print $2}'? | awk -F "', ENCODED" '{print $1}' | awk END'{print $0}'`
?
#ctr+v+tab=>"\t"
echo "$startkey????? $endkey?? $regionname?? $tablename???? 0" >> /home/hadoop/hbase_merge.txt
done
?
#/hbase/ACC_ZDR
goodfiles=`hadoop fs -ls $2 | awk '{print$8}'`
?
?
for file in $goodfiles
do
tablename=`hadoop fs -cat $file/.regioninfo | awk -F "REGION => {NAME => '" '{print $2}' | awk -F ',' '{print $1}' | awk END'{print $0}'`
regionname=`hadoop fs -cat $file/.regioninfo | awk -F " ENCODED => " '{print $2}'? | awk -F ", TABLE =>" '{print $1}' | awk END'{print $0}'`
startkey=`hadoop fs -cat $file/.regioninfo | awk -F "STARTKEY => '" '{print $2}'? | awk -F "', ENDKEY" '{print $1}' | awk END'{print $0}'`
endkey=`hadoop fs -cat $file/.regioninfo | awk -F "ENDKEY => '" '{print $2}'? | awk -F "', ENCODED" '{print $1}' | awk END'{print $0}'`
?
#ctr+v+tab=>"\t"
echo "$startkey????? $endkey?? $regionname?? $tablename???? 1" >> /home/hadoop/hbase_merge.txt
done
?
sort -t $'\t' -k 4,1 /home/hadoop/hbase_merge.txt > /home/hadoop/result_hbase_merge.txt
?
?
?? Merge命令:hbase org.apache.hadoop.hbase.util.Merge tablename region1 region2
例如:“hbase org.apache.hadoop.hbase.util.Merge CHAT_INFO? 'CHAT_INFO,1328275311100000000229959,1329200178825.1aebf5e813a21d329911e84fc2bc7229.''CHAT_INFO,1326276556100000000059123,1328873143348.2c544bbaa549746ebd43aa2e6288c584.'”
?
?? Hbase-site.xml中参数fs.default.name配置如下:
?
<property>
? <name>fs.default.name</name>
??<value>hdfs://master0:9000</value>
? </property>
?
?
?? 创建新的region块:
列举所有配置了zookeeper节点,以逗号分割conf.set("hbase.zookeeper.quorum", args[0]);//master0,slave1,slave2;
?
package com.run.hbase.dataImport;
?
import java.io.IOException;
?
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Writables;
?
public class CreateRegion {
??? public static HTable getMetaTable(Configuration conf) throws IOException {
?????? return new HTable(conf, HConstants.META_TABLE_NAME);
??? }
?
??? public static HBaseAdmin getHBaseAdmin(Configuration conf) throws MasterNotRunningException,
?????????? ZooKeeperConnectionException {
?????? return new HBaseAdmin(conf);
??? }
?
??? public static void createEmptyRegion(Configuration conf, HTableDescriptor tblDes,
?????????? byte[] startKey, byte[] endKey) throws IllegalArgumentException, IOException,
?????????? InterruptedException {
?????? HRegionInfo hri = new HRegionInfo(tblDes, startKey, endKey);
?????? Put put = new Put(hri.getRegionName());
?????? put
????????????? .add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER, Writables
???????????????????? .getBytes(hri));
?
?????? HTable metatable = getMetaTable(conf);
?????? metatable.put(put);
?????? HBaseAdmin admin = getHBaseAdmin(conf);
?????? // flush .META. data to hdfs.
?????? admin.flush(metatable.getTableName());
?????? admin.assign(hri.getRegionName(), true);
??? }
?
??? public static void main(String[] args) {
?????? System.out.println("masterip? 2181? startkey? endkey tablename columnfamily");
?
?????? Configuration conf = HBaseConfiguration.create();
?????? conf.set("hbase.zookeeper.quorum", args[0]);//master0,slave1,slave2
?????? conf.set("hbase.zookeeper.property.clientPort", args[1]);
?????? conf.set("hbase.master", args[0] + ":60000");
?
?????? HTableDescriptor tblDes = new HTableDescriptor(args[4]);
?????? HColumnDescriptor cf = new HColumnDescriptor(args[5]);
?????? tblDes.addFamily(cf);
?
?????? byte[] startKeys = Bytes.toBytes(args[2]);
?????? byte[] endKeys = Bytes.toBytes(args[3]);
?
?????? try {
?????????? createEmptyRegion(conf, tblDes, startKeys, endKeys);
?????? } catch (IllegalArgumentException e) {
?????????? e.printStackTrace();
?????? } catch (IOException e) {
?????????? e.printStackTrace();
?????? } catch (InterruptedException e) {
?????????? e.printStackTrace();
?????? }
??? }
}