博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
从wav2letter中提取语音属性的代码
阅读量:2121 次
发布时间:2019-04-30

本文共 6242 字,大约阅读时间需要 20 分钟。

从wav2letter中提取语音属性的代码

前文中已经对比了三种语音识别平台关于语音特性提取的代码框架和大致结构,因为wav2letter的代码整洁易懂,同时又调用了cblas和fftw两个外部的库提高矩阵和fft的运行效率,比较适合做进一步的分析和学习。

代码提取

语音特征提取的代码在/src/libraries/features目录,目录内的文件如下

-rw-rw-r-- 1 1251 9月  18 15:51 Ceplifter.cpp-rw-rw-r-- 1  841 9月  18 15:51 Ceplifter.h-rw-rw-r-- 1 1392 9月  18 15:51 CMakeLists.txt-rw-rw-r-- 1  949 9月  18 15:51 Dct.cpp-rw-rw-r-- 1  781 9月  18 15:51 Dct.h-rw-rw-r-- 1 2669 9月  18 15:51 Derivatives.cpp-rw-rw-r-- 1 1016 9月  18 15:51 Derivatives.h-rw-rw-r-- 1  854 9月  18 15:51 Dither.cpp-rw-rw-r-- 1  991 9月  18 15:51 Dither.h-rw-rw-r-- 1 4519 9月  18 15:51 FeatureParams.h-rw-rw-r-- 1 2284 9月  18 15:51 Mfcc.cpp-rw-rw-r-- 1 2420 9月  18 15:51 Mfcc.h-rw-rw-r-- 1 3475 9月  18 15:51 Mfsc.cpp-rw-rw-r-- 1 1121 9月  18 15:51 Mfsc.h-rw-rw-r-- 1 4742 9月  18 15:51 PowerSpectrum.cpp-rw-rw-r-- 1 1603 9月  18 15:51 PowerSpectrum.h-rw-rw-r-- 1 1488 9月  18 15:51 PreEmphasis.cpp-rw-rw-r-- 1  688 9月  18 15:51 PreEmphasis.h-rw-rw-r-- 1 2514 9月  18 15:51 SpeechUtils.cpp-rw-rw-r-- 1  638 9月  18 15:51 SpeechUtils.h-rw-rw-r-- 1 2979 9月  18 15:51 TriFilterbank.cpp-rw-rw-r-- 1 1322 9月  18 15:51 TriFilterbank.h-rw-rw-r-- 1 1631 9月  18 15:51 Windowing.cpp-rw-rw-r-- 1  730 9月  18 15:51 Windowing.h

现在讲feature目录cp到一个单独的文件夹下面,然后利用CMake尝试编译一个独立的so文件。

cp -r feature/  ~/devpath/cd ~/devpathmv feature/ speech_feature/cd speech_featuremkdir build && cd buildcmake ..

结果肯定是报错,错误信息如下:

-- look for w2l_libraries_use_Mkl:$(W2L_LIBRARIES_USE_MKL) (include: , library: )CMake Error at CMakeLists.txt:14 (find_package):  By not providing "FindCBLAS.cmake" in CMAKE_MODULE_PATH this project has  asked CMake to find a package configuration file provided by "CBLAS", but  CMake did not find one.  Could not find a package configuration file provided by "CBLAS" with any of  the following names:    CBLASConfig.cmake    cblas-config.cmake  Add the installation prefix of "CBLAS" to CMAKE_PREFIX_PATH or set  "CBLAS_DIR" to a directory containing one of the above files.  If "CBLAS"  provides a separate development package or SDK, be sure it has been  installed.-- Configuring incomplete, errors occurred!

利用CMAKE工具完成移植

现在来看一下CMakeLists.txt怎么写的。

cmake_minimum_required(VERSION 3.5.1)# ----------------------------- Dependencies -----------------------------# CBLASif (W2L_LIBRARIES_USE_MKL)  find_package(MKL REQUIRED)  set(CBLAS_LIBRARIES ${MKL_LIBRARIES})  set(CBLAS_INCLUDE_DIR ${MKL_INCLUDE_DIR})else ()  find_package(CBLAS REQUIRED)endif ()message(STATUS "CBLAS found (include: ${CBLAS_INCLUDE_DIR}, library: ${CBLAS_LIBRARIES})")# FFTWfind_package(FFTW REQUIRED)if (FFTW_FOUND)  message(STATUS "FFTW found")else()  message(FATAL_ERROR "FFTW not found")endif()# ----------------------------- Lib -----------------------------add_library(  feature-library  INTERFACE  )target_sources(  feature-library  INTERFACE  ${CMAKE_CURRENT_SOURCE_DIR}/Ceplifter.cpp  ${CMAKE_CURRENT_SOURCE_DIR}/Dct.cpp  ${CMAKE_CURRENT_SOURCE_DIR}/Derivatives.cpp  ${CMAKE_CURRENT_SOURCE_DIR}/Dither.cpp  ${CMAKE_CURRENT_SOURCE_DIR}/Mfcc.cpp  ${CMAKE_CURRENT_SOURCE_DIR}/Mfsc.cpp  ${CMAKE_CURRENT_SOURCE_DIR}/PowerSpectrum.cpp  ${CMAKE_CURRENT_SOURCE_DIR}/PreEmphasis.cpp  ${CMAKE_CURRENT_SOURCE_DIR}/SpeechUtils.cpp  ${CMAKE_CURRENT_SOURCE_DIR}/TriFilterbank.cpp  ${CMAKE_CURRENT_SOURCE_DIR}/Windowing.cpp  )target_link_libraries(  feature-library  INTERFACE  ${CBLAS_LIBRARIES}  ${FFTW_LIBRARIES}  )target_include_directories(  feature-library  INTERFACE  src/libraries/feature  ${CBLAS_INCLUDE_DIR}  )

Find_package是Cmake的一个功能,利用生成的Find*.cmake文件来自动寻找工程依赖,而wav2letter根目录下的cmake里放着我们的文件,所以把他们copy到我们要建立的工程目录。

cp FindCBLAS.cmake ../speech_feature/cmake/cp FindFFTW.cmake ../../speech_feature/cmake/

注释掉关于mkl等的判断,再次cmake编译不会报错了,但make也没有反应。这是因为当前的cmake add_library用了INTERFACE关键字,INTERFACE库不会直接创建和编译目标文件,这个库可以像其他裤一样作为target_link_libraries()命令的参数。既然不符合要求,我们干脆重写,加入共享库的目标方式,

set(SPEECH_FEAT_SRCS   ${CMAKE_CURRENT_SOURCE_DIR}/Ceplifter.cpp  ${CMAKE_CURRENT_SOURCE_DIR}/Dct.cpp  ${CMAKE_CURRENT_SOURCE_DIR}/Derivatives.cpp  ${CMAKE_CURRENT_SOURCE_DIR}/Dither.cpp  ${CMAKE_CURRENT_SOURCE_DIR}/Mfcc.cpp  ${CMAKE_CURRENT_SOURCE_DIR}/Mfsc.cpp  ${CMAKE_CURRENT_SOURCE_DIR}/PowerSpectrum.cpp  ${CMAKE_CURRENT_SOURCE_DIR}/PreEmphasis.cpp  ${CMAKE_CURRENT_SOURCE_DIR}/SpeechUtils.cpp  ${CMAKE_CURRENT_SOURCE_DIR}/TriFilterbank.cpp  ${CMAKE_CURRENT_SOURCE_DIR}/Windowing.cpp  )add_library(  speech_feat  SHARED  ${SPEECH_FEAT_SRCS}  )

这样编译能进行了,但是报了编译错误

/home/changshengwu/devpath/machinelearning/wav2letter/speech_feature/Ceplifter.cpp: In constructor ‘w2l::Ceplifter
::Ceplifter(int64_t, int64_t)’:/home/changshengwu/devpath/machinelearning/wav2letter/speech_feature/Ceplifter.cpp:21:3: error: ‘iota’ is not a member of ‘std’ std::iota(coefs_.begin(), coefs_.end(), 0.0); ^/home/changshengwu/devpath/machinelearning/wav2letter/speech_feature/Ceplifter.cpp:22:14: error: ISO C++ forbids declaration of ‘c’ with no type [-fpermissive] for (auto& c : coefs_) { ^/home/changshengwu/devpath/machinelearning/wav2letter/speech_feature/Ceplifter.cpp:22:18: warning: range-based ‘for’ loops only available with -std=c++11 or -std=gnu++11 for (auto& c : coefs_) { ^/home/changshengwu/devpath/machinelearning/wav2letter/speech_feature/Ceplifter.cpp: In member function ‘std::vector
w2l::Ceplifter
::apply(const std::vector
&) const’:/home/changshengwu/devpath/machinelearning/wav2letter/speech_feature/Ceplifter.cpp:29:20: error: ISO C++ forbids declaration of ‘output’ with no type [-fpermissive] auto output(input); ^.......

这显然是c++编译的标准问题,那么尝试从wav2letter的根cmake文件寻找c++库的指定方式。

set(CMAKE_CXX_STANDARD 11)set(CMAKE_CXX_STANDARD_REQUIRED ON)

这样cmake之后,指定了c和c++的编译器。

-- The C compiler identification is GNU 5.4.0-- The CXX compiler identification is GNU 5.4.0-- Check for working C compiler: /usr/bin/cc-- Check for working C compiler: /usr/bin/cc -- works

在build目录里生成了.so文件

lrwxrwxrwx 1  21 10月  9 15:25 libspeech_feat.so -> libspeech_feat.so.0.1*-rwxrwxr-x 1  214744 10月  9 15:25 libspeech_feat.so.0.1*

后续又将gtest提取编译,可以在linux系统上独立编译出来一个自测。

nm:

通过gnu开源gcc或者g++工具生成的目标文件(object file),可以用nm、objdump和readelf这三个命令来查看。还可以用man nm来仔细阅读帮助。这个命令帮助分析.so文件很好用。
CTEST&GTEST:
两个自测框架,应该深入学习和研究。

转载地址:http://fuyrf.baihongyu.com/

你可能感兴趣的文章
C#如何把html中的相对路径变成绝对路径
查看>>
用C#编写发手机中文短信息Windows服务
查看>>
C#的四个基本技巧
查看>>
编程实例 使用C#的BitmapData
查看>>
区分Oracle和SQL Server常用函数调用方法
查看>>
详解Visual C#数据库基本编程
查看>>
第一个C#应用程序
查看>>
第一章C#简介
查看>>
NGWS runtime 技术基础
查看>>
Linux find 文件查询 用法示例
查看>>
Linux 查看文件大小
查看>>
mysql 命令
查看>>
MySQL执行外部sql脚本文件的命令
查看>>
解决MySql Error Code: 2006
查看>>
查看mysql数据库和表所占用空间
查看>>
Guava Collections使用介绍
查看>>
Ordering犀利的比较器
查看>>
spring+Mybatis+Ehcache整合
查看>>
google guava使用例子/示范(一)
查看>>
joda-time 时间API
查看>>