最近在做web网盘的系统,网盘最基本的功能便是文件上传,但是文件上传当遇到大文件的时候,在web端按传统方式上传简直是灾难,所以大文件上传可以采用分片上传的办法。其主要思路是:1.大文件上传时进行分片;2.分片上传;3.对分片文件进行合并。思路比较清晰简单,但一些问题在于:1.大文件如何进行分片?2.分片如何进行记录和存储?3.如何校验每个分片文件的唯一性和顺序性?4.如何合并文件?对于大文件如何分片,这个主要是在前端进行解决,在这里推荐大家用百度的WebUploader来实现前端所需。对于对分片之后的文件进行存储的问题,我采用了临时文件存储的办法,临时文件存储着每个分块对应字节位的状态。对于分片文件的区分,这里可以采用MD5码的方式(不清楚MD5码的可以先查一下),MD5码简单理解就像每个文件的身份证一样,每个不同的文件都有自己唯一的MD5码。对于合并文件的时候,前端在对文件分片之后,在请求服务端合并的时候,请求中要带上分片序号和大小,服务器按照请求数据中给的分片序号和每片分块大小算出开始位置,与读取到的文件片段数据,写入文件即可。这里合并后的文件会存储俩个路径,一个是当前网盘目录下的路径,一个是真实的永久路径(目的是为了实现秒传的功能)。前端分片的代码就不贴了,主要用的百度的WebUploader。这里主要贴一些服务端的主要的代码文件上传/*** 上传文件** @param file 文件* @param wholeMd5 文件整体md5码* @param name 文件名* @param type 文件类型* @param lastModifiedDate 上传时间* @param size 文件大小* @param chunks 文件分块数* @param chunk 正在执行的块*/@ApiOperation(value = "文件上传", hidden = true)@IgnoreUserToken@ApiResponses({@ApiResponse(code = 500, response = RestError.class, message = "错误")})@PostMapping(value = "upload")public ResponseEntity<Integer> fileUpload(@ApiParam(name = "文件") @RequestPart MultipartFile file,@ApiParam(name = "md5") @RequestParam String wholeMd5,@ApiParam(name = "名称") @RequestParam String name,@ApiParam(name = "类型") @RequestParam String type,@ApiParam(name = "日期") @RequestParam Date lastModifiedDate,@ApiParam(name = "大小") @RequestParam long size,@ApiParam(name = "开始位置") @RequestParam long start,@ApiParam(name = "结束位置") @RequestParam long end,@ApiParam(name = "总分块数") @RequestParam(name = "chunks", defaultValue = "1") int chunks,@ApiParam(name = "第几个分块,从0开始") @RequestParam(name = "chunk", defaultValue = "0") int chunk) {try {log.info("文件开始上传");this.fileServiceImpl.fileUpload(file.getInputStream(), wholeMd5, name, type, lastModifiedDate, size, chunks, chunk, start, end);return ResponseEntity.ok(1);} catch (Exception e) {return new ResponseEntity(RestError.IO_ERROR.setReason(e.getMessage()).toString(), HttpStatus.INTERNAL_SERVER_ERROR);}}@Overridepublic boolean fileUpload(InputStream fileIS,String wholeMd5,String name, String type,Date lastModifiedDate, long size,int chunks,int chunk,long start,long end) throws Exception {boolean result = false;try {File tempDirFile = new File(fileDir, TEMP_DIR);if (!tempDirFile.exists()) {tempDirFile.mkdirs();}// 块目录文件夹File wholeMd5FileDirectory = new File(tempDirFile.getAbsolutePath(), wholeMd5);if (!wholeMd5FileDirectory.exists()) {wholeMd5FileDirectory.mkdirs();}// 块文件File chunkFile = new File(wholeMd5FileDirectory.getAbsolutePath(), chunk + FILE_SEPARATOR + chunks + FILE_EXT);long chunkSize = end – start;if (!chunkFile.exists() || chunkFile.length() != chunkSize) {// 创建新的块文件long startTime = System.currentTimeMillis();log.info("创建建分片{} – {} ", start, end);int length = StreamUtils.copy(fileIS, new FileOutputStream(chunkFile));long endTime = System.currentTimeMillis();log.info("分片上传耗时{}毫秒", (endTime – startTime));if (length == (end – start)) {result = true;}}} catch (Exception e) {log.error("文件上传出错{}", e.getCause());e.printStackTrace();throw e;}return result;}检查文件的MD5/*** 检查文件的md5** @param md5 文件md5* @param fileSize 文件大小* @return*/@ApiOperation(value = "检查文件的md5")@GetMapping(value = "checkFileMd5/{md5}/{fileSize}/{md5CheckLength}")@ApiResponses({@ApiResponse(code = 500, response = RestError.class, message = "错误")})public ResponseEntity<Integer> checkFileMd5(@ApiParam("文件md5码") @PathVariable String md5,@ApiParam("文件大小") @PathVariable long fileSize,@ApiParam("文件用来检查md5的长度") @PathVariable long md5CheckLength) {try {log.info("开始检验md5[{}],是否存在", md5);return ResponseEntity.ok(this.fileServiceImpl.checkFileMd5(md5, fileSize, md5CheckLength) ? 1 : 0);} catch (Exception e) {return new ResponseEntity(RestError.DATABASE_ERROR.setReason(e.getMessage()).toString(), HttpStatus.INTERNAL_SERVER_ERROR);}}@Overridepublic boolean checkFileMd5(String md5, long fileSize, long md5CheckLength) {Optional<UploadFileInfo> uploadFileInfo = this.uploadFileDao.findByMd5AndSize(md5, fileSize);boolean isExist = false;if (uploadFileInfo.isPresent()) {File wholeFile = new File(this.fileDir, uploadFileInfo.get().getDfsPath());if (wholeFile.exists() && wholeFile.length() == fileSize && md5.equals(FileUtils.md5(wholeFile, 0, md5CheckLength))) {isExist = true;}}log.info("{}的文件{}存在", md5, isExist ? "" : "不");return isExist;}检查分片是否存在/*** 检查分片是否存在** @param md5* @param chunk* @param chunks* @param chunkStart* @param chunkEnd* @return*/@ApiOperation(value = "检查分片是否存在")@ApiResponses({@ApiResponse(code = 500, response = RestError.class, message = "错误")})@GetMapping(value = "checkChunk/{md5}/{blockMd5}/{md5CheckLength}/{chunk}/{chunks}/{chunkStart}/{chunkEnd}")public ResponseEntity<Integer> checkChunk(@ApiParam("文件md5码") @PathVariable String md5,@ApiParam("分块文件md5码") @PathVariable String blockMd5,@ApiParam("用来检测分块文件md5码的长度") @PathVariable long md5CheckLength,@ApiParam("第几个分块,从0开始") @PathVariable int chunk,@ApiParam("总分块数") @PathVariable int chunks,@ApiParam("分块开始位于的文件位置") @PathVariable long chunkStart,@ApiParam("分块结束位于的文件位置") @PathVariable long chunkEnd) {try {log.info("开始检验分片[{}]-[{}]的md5[{}],是否存在", chunk, chunks, blockMd5);return ResponseEntity.ok(this.fileServiceImpl.checkChunk(md5, blockMd5, md5CheckLength, chunk, chunks, chunkStart, chunkEnd) ? 1 : 0);} catch (Exception e) {return new ResponseEntity(RestError.DATABASE_ERROR.setReason(e.getMessage()).toString(), HttpStatus.INTERNAL_SERVER_ERROR);}}@Overridepublic boolean checkChunk(String md5, String blockMd5, long md5CheckLength, int chunk, int chunks, long chunkStart, long chunkEnd) {boolean isExist = false;File chunkFile = new File(fileDir, TEMP_DIR + File.separator + md5 + File.separator + chunk + FILE_SEPARATOR + chunks + FILE_EXT);if (chunkFile.exists() && chunkFile.length() == (chunkEnd – chunkStart)) {String calBlockMd5 = FileUtils.md5(chunkFile, 0, md5CheckLength);if (blockMd5.equals(calBlockMd5)) {isExist = true;}}log.info("{}的{}-{}分块{}存在", md5, chunk, chunks, isExist ? "" : "不");return isExist;}合并文件/*** 合并文件** @param fileInfo* @return*/@ApiOperation(value = "合并文件", notes = "把分片上传的数据合并到一个文件")@ApiResponses({@ApiResponse(code = 500, response = RestError.class, message = "错误")})@PostMapping(value = "mergeChunks")public ResponseEntity<Integer> mergeChunks(@Validated @RequestBody FileInfo fileInfo, BindingResult bindingResult) {log.info("开始合并文件");if (bindingResult.hasErrors()) {log.error("错误的参数请求");return new ResponseEntity("错误的参数请求", HttpStatus.BAD_REQUEST);} else {try {DataEntity dataEntity = this.fileServiceImpl.mergeChunks(fileInfo);log.info("合并文件完成, 保存的dataEntityId为:{}", dataEntity != null ? dataEntity.getId() : null);return ResponseEntity.ok(dataEntity != null ? 1 : 0);} catch (FileMargeException e) {log.error(e.getMessage(), e);return new ResponseEntity(RestError.FILE_MARGE_ERROR.setReason(e.getMessage()).toString(), HttpStatus.INTERNAL_SERVER_ERROR);} catch (FileNotAllException e) {log.error(e.getMessage(), e);return new ResponseEntity(RestError.FILE_NOTALL_ERROR.setReason(e.getMessage()).toString(), HttpStatus.INTERNAL_SERVER_ERROR);} catch (IOException e) {log.error(e.getMessage(), e);return new ResponseEntity(RestError.IO_ERROR.setReason(e.getMessage()).toString(), HttpStatus.INTERNAL_SERVER_ERROR);}}}/*** 合并文件** @param fileInfo* @return {DataEntity}* @throws FileNotAllException* @throws IOException*/@Overridepublic DataEntity mergeChunks(FileInfo fileInfo) throws IOException, FileNotAllException, FileMargeException {// 先检查库里是否有文件的存记录Optional<UploadFileInfo> uploadFileInfoOptional = this.uploadFileDao.findByMd5AndSize(fileInfo.getMd5(), fileInfo.getSize());log.info("检查文件信息是否在数据库中存在");UploadFileInfo uploadFileInfo = null;if (uploadFileInfoOptional.isPresent()) {log.info("文件信息:{}", fileInfo);uploadFileInfo = uploadFileInfoOptional.get();}if (uploadFileInfo == null) {uploadFileInfo = new UploadFileInfo();}//再检查文件是否存在log.info("检查真实文件");File wholeFile = new File(getRealFileRoot(), fileInfo.getMd5() + FILE_SEPARATOR + fileInfo.getName());if (!wholeFile.exists() || wholeFile.length() != fileInfo.getSize()) {log.info("文件不存在或者文件长度不符合! }");if (wholeFile.exists()) {log.info("长度为{}!={},", wholeFile.length(), fileInfo.getSize());}File tempDirFile = new File(fileDir, TEMP_DIR + File.separator + fileInfo.getMd5());try {if (tempDirFile.exists()) {log.info("文件分片目录存在");// 获取该目录下所有的碎片文件File[] partFiles = tempDirFile.listFiles((f, name) -> name.endsWith(FILE_EXT));log.info("文件分片个数为:", partFiles.length);if (partFiles.length > 0) {Arrays.sort(partFiles, (File f1, File f2) -> {String name1 = f1.getName();String name2 = f2.getName();if (name1.length() < name2.length()) {return -1;} else if (name1.length() > name2.length()) {return 1;} else {return name1.compareTo(name2);}});long size = 0;FileChannel resultFileChannel = new FileOutputStream(wholeFile, true).getChannel();for (int i = 0; i < partFiles.length; i++) {size += partFiles[i].length();if (size > wholeFile.length()) {log.info("合并第{}块的文件{}", i, partFiles[i].getName());// FileUtils.copy(partFiles[i], wholeFile, size);FileChannel inChannel = new FileInputStream(partFiles[i]).getChannel();resultFileChannel.transferFrom(inChannel, resultFileChannel.size(), inChannel.size());inChannel.close();}}if (size < wholeFile.length()) {log.info("分片文件不完整");throw new FileNotAllException();}}log.info("删除分片数据信息");this.threadPoolUtil.getExecutor().execute(() -> {tempDirFile.listFiles(child -> child.delete());tempDirFile.delete();});}} catch (Exception e) {throw new FileMargeException();}}if (uploadFileInfo.getId() == null) {log.info("保存上传的文件信息");uploadFileInfo.setCreateTime(fileInfo.getCreateTime());uploadFileInfo.setMd5(fileInfo.getMd5());uploadFileInfo.setType(fileInfo.getType());uploadFileInfo.setSize(wholeFile.length());uploadFileInfo.setDfsPath(wholeFile.getAbsolutePath().substring(this.fileDir.length()+1));this.uploadFileDao.save(uploadFileInfo);}// 文件大小, 应该在合并完成的时候更新log.info("获取父目录信息");DataEntity parent = this.getDataEntityById(fileInfo.getParentId());// 如果文件信息里包含文件的相对路径, 就应该创建文件上传的真实目录String path = fileInfo.getPath();if (StringUtils.hasText(path)) {log.info("包含相对目录,进行相对目录的创建");path = FilenameUtils.getFullPathNoEndSeparator(path);String[] paths = path.split("/");for (String tempPath : paths) {if (StringUtils.hasText(tempPath)) {DataEntity dataEntity = this.dataEntityDao.findByNameAndParentAndUserId(tempPath, parent, UserUtil.getUserId());if (dataEntity == null) {dataEntity = new DataEntity();dataEntity.setName(tempPath);dataEntity.setDir(true);dataEntity.setParent(parent);parent = this.dataEntityDao.save(dataEntity);} else {parent = dataEntity;}}}}log.info("创建目录信息");DataEntity dataEntity = new DataEntity();dataEntity.setName(fileInfo.getName());dataEntity.setExt(fileInfo.getExt());dataEntity.setDataType(fileInfo.getFileType());dataEntity.setFileInfo(uploadFileInfo);dataEntity.setParent(parent);dataEntity.setSize(uploadFileInfo.getSize());dataEntity = this.saveAndRenameFile(dataEntity);this.saveAndCreateFile(dataEntity);//判断上传文件的类型,选择调用解析接口String fileType = fileInfo.getFileType();if ("images".equals(fileType)||"vector".equals(fileType)||"terrain".equals(fileType)||"original".equals(fileType)) {String resultInfo = analysis(dataEntity,fileInfo);log.info("解析结果:"+resultInfo);}return dataEntity;}关于秒传功能,其实原理就是检验文件MD5,在一个文件上传前先获取文件内容MD5值或者部分取值MD5,然后在查找自己的记录是否已存在相同的MD5,如果存在就直接从服务器真实路径取,而不需要重新进行分片上传了,从而达到秒传的效果。
本文出自快速备案,转载时请注明出处及相应链接。