hadoophdfs编程api入门系列之合并小文件到hdfs

大数据和人工智能躺过的坑      2022-02-08     342

关键词:

 

  不多说,直接上代码。

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 代码版本1

  1 package zhouls.bigdata.myWholeHadoop.HDFS.hdfs7;
  2 
  3 import java.io.IOException;
  4 import java.net.URI;
  5 import java.net.URISyntaxException;
  6 import org.apache.hadoop.conf.Configuration;
  7 import org.apache.hadoop.fs.FSDataInputStream;
  8 import org.apache.hadoop.fs.FSDataOutputStream;
  9 import org.apache.hadoop.fs.FileStatus;
 10 import org.apache.hadoop.fs.FileSystem;
 11 import org.apache.hadoop.fs.FileUtil;
 12 import org.apache.hadoop.fs.Path;
 13 import org.apache.hadoop.fs.PathFilter;
 14 import org.apache.hadoop.io.IOUtils;
 15 /**
 16  * function 合并小文件至 HDFS 
 17  * 
 18  *
 19  */
 20 public class MergeSmallFilesToHDFS 
 21 {
 22     private static FileSystem fs = null;  //定义文件系统对象,是HDFS上的
 23     private static FileSystem local = null; //定义文件系统对象,是本地上的
 24     
 25     /**
 26      * @function main 
 27      * @param args
 28      * @throws IOException
 29      * @throws URISyntaxException
 30      */
 31     
 32     public static void main(String[] args) throws IOException,URISyntaxException{
 33     
 34         list();
 35     }
 36 
 37     /**
 38      * 
 39      * @throws IOException
 40      * @throws URISyntaxException
 41      */
 42     public static void list() throws IOException, URISyntaxException{
 43         // 读取hadoop配置文件
 44         Configuration conf = new Configuration();
 45         // 文件系统访问接口和创建FileSystem对象,在本地上运行模式
 46         URI uri = new URI("hdfs://HadoopMaster:9000");
 47         fs = FileSystem.get(uri, conf);
 48         // 获得本地文件系统
 49         local = FileSystem.getLocal(conf);
 50         // 过滤目录下的 svn 文件
 51         FileStatus[] dirstatus = local.globStatus(new Path("./data/mergeSmallFilesToHDFS/73/*"),new RegexExcludePathFilter("^.*svn$"));
 52 //    FileStatus[] dirstatus = local.globStatus(new Path("D://data/73/*"),new RegexExcludePathFilter("^.*svn$"));
 53         //获取D:\Data\tvdata目录下的所有文件路径
 54         Path[] dirs = FileUtil.stat2Paths(dirstatus);
 55         FSDataOutputStream out = null;
 56         FSDataInputStream in = null;
 57         for (Path dir : dirs) 
 58         {//比如拿2012-09-17为例
 59             //将文件夹名称2012-09-17的-去掉,直接,得到20120901文件夹名称
 60             String fileName = dir.getName().replace("-", "");//文件名称
 61             //只接受20120917日期目录下的.txt文件
 62             FileStatus[] localStatus = local.globStatus(new Path(dir+"/*"),new RegexAcceptPathFilter("^.*txt$"));
 63             // 获得20120917日期目录下的所有文件
 64             Path[] listedPaths = FileUtil.stat2Paths(localStatus);
 65             // 输出路径
 66             Path block = new Path("hdfs://HadoopMaster:9000/middle/tv/"+ fileName + ".txt");
 67             System.out.println("合并后的文件名称:"+fileName+".txt");
 68             // 打开输出流
 69             out = fs.create(block);    
 70             //循环20120917日期目录下的所有文件
 71             for (Path p : listedPaths){//这是星型for循环,即listedPaths的值传给Path p
 72                 in = local.open(p);// 打开输入流
 73                 IOUtils.copyBytes(in, out, 4096, false); // 复制数据
 74                 // 关闭输入流
 75                 in.close();
 76             }
 77             if (out != null){
 78                 // 关闭输出流
 79                 out.close();
 80             }
 81             //当循环完20120917日期目录下的所有文件之后,接着依次20120918,20120919,,,
 82         }
 83     }
 84 
 85     /**
 86      * 
 87      * @function 过滤 regex 格式的文件
 88      *
 89      */
 90     public static class RegexExcludePathFilter implements PathFilter{
 91         private final String regex;
 92 
 93         public RegexExcludePathFilter(String regex){
 94             this.regex = regex;
 95         }
 96 
 97         
 98         public boolean accept(Path path){
 99             // TODO Auto-generated method stub
100             boolean flag = path.toString().matches(regex);
101             return !flag;
102         }
103 
104     }
105 
106     /**
107      * 
108      * @function 接受 regex 格式的文件
109      *
110      */
111     public static class RegexAcceptPathFilter implements PathFilter{
112         private final String regex;
113 
114         public RegexAcceptPathFilter(String regex){
115             this.regex = regex;
116         }
117 
118     
119         public boolean accept(Path path){
120             // TODO Auto-generated method stub
121             boolean flag = path.toString().matches(regex);
122             return flag;
123         }
124 
125     }
126 }

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

代码版本2

  1 package com.dajiangtai.Hadoop.HDFS;
  2 
  3 import java.io.IOException;
  4 import java.net.URI;
  5 import java.net.URISyntaxException;
  6 import org.apache.hadoop.conf.Configuration;
  7 import org.apache.hadoop.fs.FSDataInputStream;
  8 import org.apache.hadoop.fs.FSDataOutputStream;
  9 import org.apache.hadoop.fs.FileStatus;
 10 import org.apache.hadoop.fs.FileSystem;
 11 import org.apache.hadoop.fs.FileUtil;
 12 import org.apache.hadoop.fs.Path;
 13 import org.apache.hadoop.fs.PathFilter;
 14 import org.apache.hadoop.hdfs.DistributedFileSystem;
 15 import org.apache.hadoop.io.IOUtils;
 16 /**
 17  * function 合并小文件至 HDFS     ,  文件与块大小(比如128M)来比,小的话,称为小文件。是一个相对概念!相对于数据块而言的!
 18  * @author 小讲
 19  *  我们利用通配符和PathFilter 对象,将本地多种格式的文件上传至 HDFS文件系统,并过滤掉 txt文本格式以外的文件。
 20  */
 21 public class MergeSmallFilesToHDFS {
 22     private static FileSystem fs = null;
 23     private static FileSystem local = null;
 24     /**
 25      * @function main 
 26      * @param args
 27      * @throws IOException
 28      * @throws URISyntaxException
 29      */
 30     public static void main(String[] args) throws IOException,
 31             URISyntaxException {
 32         list();
 33     }
 34 
 35     /**
 36      * 
 37      * @throws IOException
 38      * @throws URISyntaxException
 39      */
 40     public static void list() throws IOException, URISyntaxException {
 41         // 读取hadoop文件系统的配置
 42         Configuration conf = new Configuration();
 43 //        conf=Configuration
 44 //        conf是Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml
 45         
 46         //文件系统访问接口
 47         URI uri = new URI("hdfs://djt002:9000");
 48 //        uri=URI
 49 //        uri是hdfs://djt002:9000
 50         
 51 //        URL、URI与Path三者的区别
 52 //        Hadoop文件系统中通过Hadoop Path对象来代表一个文件    
 53 //        URL(相当于绝对路径)    ->   (文件) ->    URI(相当于相对路径,即代表URL前面的那一部分)
 54 //        URI:如hdfs://dajiangtai:9000
 55 //        如,URL.openStream
 56         
 57         
 58         
 59         //获得FileSystem实例,即HDFS
 60         fs = FileSystem.get(uri, conf);
 61 //        fs=DistributedFileSystem
 62 //        fs是DFS[DFSClient[clientName=DFSClient_NONMAPREDUCE_1814566850_1, ugi=Administrator (auth:SIMPLE)]]
 63         
 64         //获得FileSystem实例,即Local
 65         local = FileSystem.getLocal(conf);
 66 //        local=LocalFileSystem
 67 //        local是org.apache.hadoop.fs.LocalFileSystem@3ce1b8c5
 68 //            为什么要获取到Local呢,因为,我们要把本地D盘下data/73目录下的文件要合并后,上传到HDFS里,所以,我们需先获取到Local,再来做合并工作啦!
 69         
 70         
 71 //        18、列出文件或目录内容(主要是存放文件或目录的元数据,即大小,权限,副本,,,)
 72 //        public FileStatus[] listStatus(Path f) throws IOException
 73 //        public FileStatus[] listStatus(Path f,PathFilter filter) throws IOException
 74 //                PathFilter是路径过滤器
 75 //        public FileStatus[] listStatus(Path[] files) throws IOException
 76 //        public FileStatus[] listStatus(Path[] files,PathFilter filter)
 77 //                传送Path数组和路径过滤器
 78 //                
 79 //                
 80 //        19、FileUtil中的stat2Paths(),将一个FileStatus元数据对象数组转换为一个Path对象数组
 81 //
 82 //        20、(1)使用通配符来匹配多个目录下的多个文件,也是列出文件或目录内容(主要是存放文件或目录的元数据,即大小,权限,副本,,,)
 83 //        public FileStatus[] globStatus(Path pathPattern) throws IOException
 84 //        public FileStatus[] globStatus(Path pathPattern,PathFilter filter) throws IOException
 85 //                    
 86 //          (2)PathFilter对象
 87 //        public interface PathFilter{
 88 //            boolean accpet(Path path);
 89 //        }        
 90         
 91         
 92         
 93         //过滤目录下的 svn 文件,globStatus从第一个参数通配符合到文件,剔除满足第二个参数到结果,因为PathFilter中accept是return!  
 94         FileStatus[] dirstatus = local.globStatus(new Path("D://data/73/*"),new RegexExcludePathFilter("^.*svn$"));//一般这是隐藏文件,所以得排除
 95         //dirstatus=FileStatus[7]
 96 //        dirstatus是[DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-17; isDirectory=true; modification_time=1427791478002; access_time=0; owner=; group=; permission=rwxrwxrwx; isSymlink=false}
 97 //        , DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-18; isDirectory=true; modification_time=1427791505373; access_time=0; owner=; group=; permission=rwxrwxrwx; isSymlink=false}
 98 //        , DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-19; isDirectory=true; modification_time=1427791532277; access_time=0; owner=; group=; permission=rwxrwxrwx; isSymlink=false}
 99 //        , DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-20; isDirectory=true; modification_time=1427791553035; access_time=0; owner=; group=; permission=rwxrwxrwx; isSymlink=false}
100 //        , DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-21; isDirectory=true; modification_time=1427791577709; access_time=0; owner=; group=; permission=rwxrwxrwx; isSymlink=false}
101 //        , DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-22; isDirectory=true; modification_time=1427791602770; access_time=0; owner=; group=; permission=rwxrwxrwx; isSymlink=false}
102 //        , DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-23; isDirectory=true; modification_time=1427791647177; access_time=0; owner=; group=; permission=rwxrwxrwx; isSymlink=false}]
103         
104                         
105                         //        ^表示匹配我们字符串开始的位置               *代表0到多个字符                        $代表字符串结束的位置
106 //        RegexExcludePathFilter来只排除我们不需要的,即svn格式
107 //        RegexExcludePathFilter这个方法我们自己写
108         
109 //        但是我们,最终是要处理文件里的东西,最终是要转成Path类型,因为Path对象f,它对应着一个文件。
110         
111         //获取73目录下的所有文件路径,注意FIleUtil中stat2Paths()的使用,它将一个FileStatus对象数组转换为Path对象数组。
112         Path[] dirs = FileUtil.stat2Paths(dirstatus);//dirstatus是FileStatus数组类型
113 //        dirs=Path[7]
114 //        dirs是    [file:/D:/data/73/2012-09-17
115 //                 , file:/D:/data/73/2012-09-18
116 //                 , file:/D:/data/73/2012-09-19
117 //                 , file:/D:/data/73/2012-09-20
118 //                 , file:/D:/data/73/2012-09-21
119 //                 , file:/D:/data/73/2012-09-22
120 //                 , file:/D:/data/73/2012-09-23]        
121                 
122         
123         FSDataOutputStream out = null;//输出流
124 //        out=HdfsDaDataOutputStream
125 //        out是org.apache.hadoop.hdfs.client.HdfsDataOutputStream@2b11624e
126         
127         FSDataInputStream in = null;//输入流
128 //        in=ChecksumFileSystem&FSDataBoundedInputStream
129 //        in是org.apache.hadoop.fs.ChecksumFileSystem$FSDataBoundedInputStream@526d542f
130         
131 //        很多人搞不清输入流和输出流,!!!!
132 //        其实啊,输入流、输出流都是针对内存的
133 //        往内存里写,是输入流。
134 //        内存往文件里写,是输出Luis。
135 //        
136 //        比如一个文件A复制到另一文件B,那么,先写到内存里,再写到文件B。
137 //           =>   则文件A写到内存里,叫输入流。
138 //           =>    则内存里写到文件B,叫输出流    
139         
140         
141         for (Path dir : dirs) {//for星型循环,即将dirs是Path对象数组,一一传给Path dir
142 //            dirs=Path[7]
143 //            dirs是[file:/D:/data/73/2012-09-17
144 //                  , file:/D:/data/73/2012-09-18
145 //                  , file:/D:/data/73/2012-09-19
146 //                  , file:/D:/data/73/2012-09-20
147 //                  , file:/D:/data/73/2012-09-21
148 //                  , file:/D:/data/73/2012-09-22
149 //                  , file:/D:/data/73/2012-09-23]    
150             
151 //        dir= Path    
152 //        先传,dir是file:/D:/data/73/2012-09-17
153 //        再传,file:/D:/data/73/2012-09-18           
154 //        再传,file:/D:/data/73/2012-09-19     
155 //        再传,file:/D:/data/73/2012-09-20       
156 //        再传,file:/D:/data/73/2012-09-21       
157 //        再传,file:/D:/data/73/2012-09-22       
158 //        再传,file:/D:/data/73/2012-09-23       
159             
160             String fileName = dir.getName().replace("-", "");//文件名称
161 //                        先获取到如2012-09-17,然后经过replace("-", ""),得到20120917
162 //                                                                再获取,20120918
163 //                                                                再获取,20120919
164 //                                                                再获取,20120920
165 //                                                                再获取,20120921
166 //                                                                再获取,20120922
167 //                                                                再获取,20120923            
168             
169             //只接受日期目录下的.txt文件,^匹配输入字符串的开始位置,$匹配输入字符串的结束位置,*匹配0个或多个字符。
170             FileStatus[] localStatus = local.globStatus(new Path(dir+"/*"),new RegexAcceptPathFilter("^.*txt$"));
171 //            先获取到,localStatus=FileStatus[23]
172 //                   localStatus是[DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-17/ars10767@20120917000000.txt; isDirectory=false; length=1111961; replication=1; blocksize=33554432; modification_time=1398669214000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-17/ars10767@20120917001500.txt; isDirectory=false; length=782533; replication=1; blocksize=33554432; modification_time=1398669214000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-17/ars10767@20120917003000.txt; isDirectory=false; length=593507; replication=1; blocksize=33554432; modification_time=1398669214000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-17/ars10767@20120917004500.txt; isDirectory=false; length=839019; replication=1; blocksize=33554432; modification_time=1398669214000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-17/ars10767@20120917010000.txt; isDirectory=false; length=866393; replication=1; blocksize=33554432; modification_time=1398669214000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-17/ars10767@20120917011500.txt; isDirectory=false; length=678491; replication=1; blocksize=33554432; modification_time=1398669214000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-17/ars10767@20120917013000.txt; isDirectory=false; length=593292; replication=1; blocksize=33554432; modification_time=1398669214000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-17/ars10767@20120917014500.txt; isDirectory=false; length=688620; replication=1; blocksize=33554432; modification_time=1398669214000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-17/ars10767@20120917020000.txt; isDirectory=false; length=674864; replication=1; blocksize=33554432; modification_time=1398669214000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-17/ars10767@20120917021500.txt; isDirectory=false; length=635052; replication=1; blocksize=33554432; modification_time=1398669214000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-17/ars10767@20120917023000.txt; isDirectory=false; length=547324; replication=1; blocksize=33554432; modification_time=1398669214000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-17/ars10767@20120917024500.txt; isDirectory=false; length=598814; replication=1; blocksize=33554432; modification_time=1398669214000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-17/ars10767@20120917030000.txt; isDirectory=false; length=542600; replication=1; blocksize=33554432; modification_time=1398669214000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-17/ars10767@20120917031500.txt; isDirectory=false; length=535446; replication=1; blocksize=33554432; modification_time=1398669214000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-17/ars10767@20120917033000.txt; isDirectory=false; length=592780; replication=1; blocksize=33554432; modification_time=1398669214000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-17/ars10767@20120917034500.txt; isDirectory=false; length=619410; replication=1; blocksize=33554432; modification_time=1398669216000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-17/ars10767@20120917040000.txt; isDirectory=false; length=590326; replication=1; blocksize=33554432; modification_time=1398669214000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-17/ars10767@20120917041500.txt; isDirectory=false; length=428487; replication=1; blocksize=33554432; modification_time=1398669214000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-17/ars10767@20120917043000.txt; isDirectory=false; length=598048; replication=1; blocksize=33554432; modification_time=1398669214000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-17/ars10767@20120917044500.txt; isDirectory=false; length=598792; replication=1; blocksize=33554432; modification_time=1398669216000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-17/ars10767@20120917050000.txt; isDirectory=false; length=575613; replication=1; blocksize=33554432; modification_time=1398669214000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-17/ars10767@20120917051500.txt; isDirectory=false; length=619080; replication=1; blocksize=33554432; modification_time=1398669214000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-17/ars10767@20120917053000.txt; isDirectory=false; length=587763; replication=1; blocksize=33554432; modification_time=1398669214000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}]
173 //                   再获取到,localStatus=FileStatus[23]
174 //            localStatus是[DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-18/ars10767@20120918131500.txt; isDirectory=false; length=1722797; replication=1; blocksize=33554432; modification_time=1398669244000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-18/ars10767@20120918133000.txt; isDirectory=false; length=1922955; replication=1; blocksize=33554432; modification_time=1398669244000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-18/ars10767@20120918134500.txt; isDirectory=false; length=1388036; replication=1; blocksize=33554432; modification_time=1398669244000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-18/ars10767@20120918140000.txt; isDirectory=false; length=1888871; replication=1; blocksize=33554432; modification_time=1398669244000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-18/ars10767@20120918141500.txt; isDirectory=false; length=1685719; replication=1; blocksize=33554432; modification_time=1398669244000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-18/ars10767@20120918143000.txt; isDirectory=false; length=1541381; replication=1; blocksize=33554432; modification_time=1398669244000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-18/ars10767@20120918144500.txt; isDirectory=false; length=1723638; replication=1; blocksize=33554432; modification_time=1398669244000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-18/ars10767@20120918150000.txt; isDirectory=false; length=1629322; replication=1; blocksize=33554432; modification_time=1398669244000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-18/ars10767@20120918151500.txt; isDirectory=false; length=1658684; replication=1; blocksize=33554432; modification_time=1398669244000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-18/ars10767@20120918153000.txt; isDirectory=false; length=1548216; replication=1; blocksize=33554432; modification_time=1398669244000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-18/ars10767@20120918154500.txt; isDirectory=false; length=1510965; replication=1; blocksize=33554432; modification_time=1398669244000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-18/ars10767@20120918160000.txt; isDirectory=false; length=1559078; replication=1; blocksize=33554432; modification_time=1398669244000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-18/ars10767@20120918161500.txt; isDirectory=false; length=1752005; replication=1; blocksize=33554432; modification_time=1398669244000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-18/ars10767@20120918163000.txt; isDirectory=false; length=1901994; replication=1; blocksize=33554432; modification_time=1398669244000; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false}, DeprecatedRawLocalFileStatus{path=file:/D:/data/73/2012-09-18/ars10767@20120918164500.txt; isDirectory=false; length=2234304; replication=1; blocksize=33554432; modification_time=1398669244000; a

hadoophdfs编程api入门系列之hdfsutil版本1

   不多说,直接上代码。                     代码packagezhouls.bigdata.myWholeHadoop.HDFS.hdfs2;importj 查看详情

hadoopmapreduce编程api入门系列之网页流量版本1(二十二)

    不多说,直接上代码。  对流量原始日志进行流量统计,将不同省份的用户统计结果输出到不同文件。                 代码packagezhouls.bigdata.myMapReduce.fl 查看详情

hadoopmapreduce编程api入门系列之网页流量版本1(二十一)

   不多说,直接上代码。  对流量原始日志进行流量统计,将不同省份的用户统计结果输出到不同文件。                       代码packag 查看详情

hadoopmapreduce编程api入门系列之挖掘气象数据版本2

     下面,是版本1。HadoopMapReduce编程API入门系列之挖掘气象数据版本1(一)       这篇博文,包括了,实际生产开发非常重要的,单元测试和调试代码。这里不多赘述,直接送上代码。 MRUn... 查看详情

hadoopmapreduce编程api入门系列之倒排索引(二十四)

     不多说,直接上代码。2016-12-1221:54:04,509INFO[org.apache.hadoop.metrics.jvm.JvmMetrics]-InitializingJVMMetricswithprocessName=JobTracker,sessionId=2016-12-1221:54:05,166WARN[org.apache.hado 查看详情

hadoopmapreduce编程api入门系列之fof(fundoffund)(二十三)

    不多说,直接上代码。                     代码packagezhouls.bigdata.myMapReduce.friend;importo 查看详情

hadoopmapreduce编程api入门系列之统计学生成绩版本1(十七)

    不多说,直接上代码。                        代码packagezhouls.bigdata.myMapRed 查看详情

hbase编程api入门之htablepool

   HTable是一个比较重的对此,比如加载配置文件,连接ZK,查询meta表等等,高并发的时候影响系统的性能,因此引入了“池”的概念。   引入“HBase里的连接池”的目的是,            ... 查看详情

hbase编程api入门之delete

   前面的基础,是HBase编程API入门之putHBase编程API入门之get hbase(main):001:0>scan‘test_table2‘ROWCOLUMN+CELLrow_04column=f1:name,timestamp=1478117286377,value=Andy3row_04column=f2:name,timestamp=14781172 查看详情

hbase编程api入门之get

    前面是基础。HBase编程API入门之put packagezhouls.bigdata.HbaseProject.Test1;importjavax.xml.transform.Result;importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.hbase.HBaseConfig 查看详情

scala入门系列:函数式编程之集合操作

1.Scala的集合体系结构Scala中的集合体系主要包括(结构跟Java相似):Iterable(所有集合trait的根trait)Seq(Range、ArrayBuffer、List等)Set(HashSet、LinkedHashSet、SortedSet等)Map(HashMap、SortedMap、LinkedHashMap等)Scala中的集合分为可变和不... 查看详情

跟着小蚂蚁学编程

打算开一系列编程教程课程初步计划的板块如下:C++零基础基础Qt入门Godot零基础入门算法和数据结构Blender入门小软件开发系列小游戏开发系列模型制作系列不知道有没有想学的小伙伴呢? 查看详情

跟着小蚂蚁学编程

打算开一系列编程教程课程初步计划的板块如下:C++零基础基础Qt入门Godot零基础入门算法和数据结构Blender入门小软件开发系列小游戏开发系列模型制作系列不知道有没有想学的小伙伴呢? 查看详情

hbase编程api入门之create(管理端而言)

...;   大家,若是看过我前期的这篇博客的话,则HBase编程API入门之put(客户端而言)  就知道,在这篇博文里,我是在HBaseShell里创建HBase表的。   这里,我带领大家,学习更高级的,因为,在开发中,尽量不能去服... 查看详情

android系统编程入门系列之应用环境及开发环境介绍

    作为移动端操作系统,目前最新的Android11.0已经发展的比较完善了,现在也到了系统的整理一番的时间,接下来的系列文章将以Android开发者为中心,争取用归纳总结的态度对初级入门者所应掌握的基础知识聊以标记。应... 查看详情

pandas系列之入门篇

Pandas系列之入门篇简介pandas是python用来数据清洗、分析的包,可以使用类sql的语法方便的进行数据关联、查询,属于内存计算范畴,效率远远高于硬盘计算的数据库存储。另外pandas还提供了大数据存储的API——HDFStore,来对接HDF5... 查看详情

系列文章--opencv入门教程

 《OpenCV3编程入门》内容简介&勘误&配套源代码下载【OpenCV入门教程之十八】OpenCV仿射变换&SURF特征点描述合辑【OpenCV入门教程之十七】OpenCV重映射&SURF特征点检测合辑【OpenCV入门教程之十六】OpenCV角点检测之Harris... 查看详情

hbase编程api入门之delete.deletecolumn和delete.deletecolumns区别

   delete.deleteColumn和delete.deleteColumns区别是    deleteColumn是删除某一个列簇里的最新时间戳版本。    delete.deleteColumns是删除某个列簇里的所有时间戳版本。    hbase(main):020:0>desc‘test_table‘Tabletest_t... 查看详情