Skip to content

Commit

Permalink
Add Python ASR example with alsa (#324)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj committed Mar 8, 2024
1 parent 3c7724c commit 87899c2
Show file tree
Hide file tree
Showing 11 changed files with 227 additions and 4 deletions.
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
project(sherpa-ncnn)

set(SHERPA_NCNN_VERSION "2.1.9")
set(SHERPA_NCNN_VERSION "2.1.10")

# Disable warning about
#
Expand Down Expand Up @@ -106,6 +106,7 @@ if(SHERPA_NCNN_ENABLE_BINARY AND UNIX AND NOT APPLE)
include(CheckIncludeFileCXX)
check_include_file_cxx(alsa/asoundlib.h SHERPA_NCNN_HAS_ALSA)
if(SHERPA_NCNN_HAS_ALSA)
message(STATUS "With Alsa")
add_definitions(-DSHERPA_NCNN_ENABLE_ALSA=1)
elseif(UNIX AND NOT APPLE)
message(WARNING "\
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#!/usr/bin/env python3

# Real-time speech recognition from a microphone with sherpa-ncnn Python API
# with endpoint detection.
#
# Note: This script uses ALSA and works only on Linux systems, especially
# for embedding Linux systems and for running Linux on Windows using WSL.
#
# Please refer to
# https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html
# to download pre-trained models

import argparse
import sys

import sherpa_ncnn


def get_args():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)

parser.add_argument(
"--device-name",
type=str,
required=True,
help="""
The device name specifies which microphone to use in case there are several
on your system. You can use
arecord -l
to find all available microphones on your computer. For instance, if it outputs
**** List of CAPTURE Hardware Devices ****
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
Subdevices: 1/1
Subdevice #0: subdevice #0
and if you want to select card 3 and the device 0 on that card, please use:
plughw:3,0
as the device_name.
""",
)

return parser.parse_args()


def create_recognizer():
# Please replace the model files if needed.
# See https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html
# for download links.
recognizer = sherpa_ncnn.Recognizer(
tokens="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/tokens.txt",
encoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.param",
encoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.bin",
decoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.param",
decoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.bin",
joiner_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.param",
joiner_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.bin",
num_threads=4,
decoding_method="modified_beam_search",
enable_endpoint_detection=True,
rule1_min_trailing_silence=2.4,
rule2_min_trailing_silence=1.2,
rule3_min_utterance_length=300,
hotwords_file="",
hotwords_score=1.5,
)
return recognizer


def main():
args = get_args()
device_name = args.device_name
print(f"device_name: {device_name}")
alsa = sherpa_ncnn.Alsa(device_name)

recognizer = create_recognizer()
print("Started! Please speak")
sample_rate = recognizer.sample_rate
samples_per_read = int(0.1 * sample_rate) # 0.1 second = 100 ms
last_result = ""
segment_id = 0

while True:
samples = alsa.read(samples_per_read) # a blocking read
recognizer.accept_waveform(sample_rate, samples)

is_endpoint = recognizer.is_endpoint

result = recognizer.text
if result and (last_result != result):
last_result = result
print("\r{}:{}".format(segment_id, result), end="", flush=True)

if is_endpoint:
if result:
print("\r{}:{}".format(segment_id, result), flush=True)
segment_id += 1
recognizer.reset()


if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\nCaught Ctrl + C. Exiting")
2 changes: 1 addition & 1 deletion sherpa-ncnn/csrc/alsa.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
and if you want to select card 3 and the device 0 on that card, please use:
hw:3,0
plughw:3,0
)";

Expand Down
2 changes: 1 addition & 1 deletion sherpa-ncnn/csrc/sherpa-ncnn-alsa.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
and if you want to select card 3 and the device 0 on that card, please use:
hw:3,0
plughw:3,0
as the device_name.
)usage";
Expand Down
1 change: 1 addition & 0 deletions sherpa-ncnn/csrc/stream.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "sherpa-ncnn/csrc/stream.h"

#include <iostream>
#include <utility>

namespace sherpa_ncnn {

Expand Down
13 changes: 13 additions & 0 deletions sherpa-ncnn/python/csrc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ set(srcs
stream.cc
)

if(SHERPA_NCNN_HAS_ALSA)
list(APPEND srcs ${CMAKE_SOURCE_DIR}/sherpa-ncnn/csrc/alsa.cc alsa.cc)
else()
list(APPEND srcs faked-alsa.cc)
endif()

pybind11_add_module(_sherpa_ncnn ${srcs})
target_link_libraries(_sherpa_ncnn PRIVATE sherpa-ncnn-core)

Expand All @@ -28,6 +34,13 @@ if(NOT WIN32)
target_link_libraries(_sherpa_ncnn PRIVATE "-Wl,-rpath,${SHERPA_NCNN_RPATH_ORIGIN}/sherpa_ncnn/lib")
endif()

if(SHERPA_NCNN_HAS_ALSA)
if(DEFINED ENV{SHERPA_NCNN_ALSA_LIB_DIR})
target_link_libraries(_sherpa_ncnn PRIVATE -L$ENV{SHERPA_NCNN_ALSA_LIB_DIR} -lasound)
else()
target_link_libraries(_sherpa_ncnn PRIVATE asound)
endif()
endif()

install(TARGETS _sherpa_ncnn
DESTINATION ../
Expand Down
30 changes: 30 additions & 0 deletions sherpa-ncnn/python/csrc/alsa.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// sherpa-ncnn/python/csrc/alsa.cc
//
// Copyright (c) 2024 Xiaomi Corporation

#include "sherpa-ncnn/python/csrc/alsa.h"

#include <vector>

#include "sherpa-ncnn/csrc/alsa.h"

namespace sherpa_ncnn {

void PybindAlsa(py::module *m) {
using PyClass = Alsa;
py::class_<PyClass>(*m, "Alsa")
.def(py::init<const char *>(), py::arg("device_name"),
py::call_guard<py::gil_scoped_release>())
.def(
"read",
[](PyClass &self, int32_t num_samples) -> std::vector<float> {
return self.Read(num_samples);
},
py::arg("num_samples"), py::call_guard<py::gil_scoped_release>())
.def_property_readonly("expected_sample_rate",
&PyClass::GetExpectedSampleRate)
.def_property_readonly("actual_sample_rate",
&PyClass::GetActualSampleRate);
}

} // namespace sherpa_ncnn
16 changes: 16 additions & 0 deletions sherpa-ncnn/python/csrc/alsa.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// sherpa-ncnn/python/csrc/alsa.h
//
// Copyright (c) 2024 Xiaomi Corporation

#ifndef SHERPA_NCNN_PYTHON_CSRC_ALSA_H_
#define SHERPA_NCNN_PYTHON_CSRC_ALSA_H_

#include "sherpa-ncnn/python/csrc/sherpa-ncnn.h"

namespace sherpa_ncnn {

void PybindAlsa(py::module *m);

} // namespace sherpa_ncnn

#endif // SHERPA_NCNN_PYTHON_CSRC_ALSA_H_
47 changes: 47 additions & 0 deletions sherpa-ncnn/python/csrc/faked-alsa.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// sherpa-ncnn/python/csrc/faked-alsa.cc
//
// Copyright (c) 2024 Xiaomi Corporation

#include "sherpa-ncnn/csrc/macros.h"
#include "sherpa-ncnn/python/csrc/alsa.h"

namespace sherpa_ncnn {

class FakedAlsa {
public:
explicit FakedAlsa(const char *) {
SHERPA_NCNN_LOGE("This function is for Linux only.");
#if (SHERPA_NCNN_ENABLE_ALSA == 0) && (defined(__unix__) || defined(__unix))
SHERPA_NCNN_LOGE(R"doc(
sherpa-ncnn is compiled without alsa support. To enable that, please run
(1) sudo apt-get install alsa-utils libasound2-dev
(2) rebuild sherpa-ncnn
)doc");
#endif
exit(-1);
}

std::vector<float> Read(int32_t) const { return {}; }
int32_t GetExpectedSampleRate() const { return -1; }
int32_t GetActualSampleRate() const { return -1; }
};

void PybindAlsa(py::module *m) {
using PyClass = FakedAlsa;
py::class_<PyClass>(*m, "Alsa")
.def(py::init<const char *>(), py::arg("device_name"))
.def(
"read",
[](PyClass &self, int32_t num_samples) -> std::vector<float> {
return self.Read(num_samples);
},
py::arg("num_samples"), py::call_guard<py::gil_scoped_release>())
.def_property_readonly("expected_sample_rate",
&PyClass::GetExpectedSampleRate)
.def_property_readonly("actual_sample_rate",
&PyClass::GetActualSampleRate);
}

} // namespace sherpa_ncnn

#endif // SHERPA_NCNN_PYTHON_CSRC_FAKED_ALSA_H_
3 changes: 3 additions & 0 deletions sherpa-ncnn/python/csrc/sherpa-ncnn.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include "sherpa-ncnn/python/csrc/sherpa-ncnn.h"

#include "sherpa-ncnn/python/csrc/alsa.h"
#include "sherpa-ncnn/python/csrc/decoder.h"
#include "sherpa-ncnn/python/csrc/display.h"
#include "sherpa-ncnn/python/csrc/endpoint.h"
Expand All @@ -39,6 +40,8 @@ PYBIND11_MODULE(_sherpa_ncnn, m) {
PybindRecognizer(&m);

PybindDisplay(&m);

PybindAlsa(&m);
}

} // namespace sherpa_ncnn
3 changes: 2 additions & 1 deletion sherpa-ncnn/python/sherpa_ncnn/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from _sherpa_ncnn import Alsa, Display

from .recognizer import Recognizer
from _sherpa_ncnn import Display

0 comments on commit 87899c2

Please sign in to comment.