tencentcloud.CiMediaSpeechRecognitionTemplate
Explore with Pulumi AI
Provides a resource to create a ci media_speech_recognition_template
Example Usage
import * as pulumi from "@pulumi/pulumi";
import * as tencentcloud from "@pulumi/tencentcloud";
const mediaSpeechRecognitionTemplate = new tencentcloud.CiMediaSpeechRecognitionTemplate("mediaSpeechRecognitionTemplate", {
bucket: "terraform-ci-1308919341",
speechRecognition: {
channelNum: "1",
convertNumMode: "0",
engineModelType: "16k_zh",
filterDirty: "0",
filterModal: "1",
filterPunc: "0",
outputFileType: "txt",
resTextFormat: "1",
speakerDiarization: "1",
speakerNumber: "0",
},
});
import pulumi
import pulumi_tencentcloud as tencentcloud
media_speech_recognition_template = tencentcloud.CiMediaSpeechRecognitionTemplate("mediaSpeechRecognitionTemplate",
bucket="terraform-ci-1308919341",
speech_recognition={
"channel_num": "1",
"convert_num_mode": "0",
"engine_model_type": "16k_zh",
"filter_dirty": "0",
"filter_modal": "1",
"filter_punc": "0",
"output_file_type": "txt",
"res_text_format": "1",
"speaker_diarization": "1",
"speaker_number": "0",
})
package main
import (
"github.com/pulumi/pulumi-terraform-provider/sdks/go/tencentcloud/tencentcloud"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := tencentcloud.NewCiMediaSpeechRecognitionTemplate(ctx, "mediaSpeechRecognitionTemplate", &tencentcloud.CiMediaSpeechRecognitionTemplateArgs{
Bucket: pulumi.String("terraform-ci-1308919341"),
SpeechRecognition: &tencentcloud.CiMediaSpeechRecognitionTemplateSpeechRecognitionArgs{
ChannelNum: pulumi.String("1"),
ConvertNumMode: pulumi.String("0"),
EngineModelType: pulumi.String("16k_zh"),
FilterDirty: pulumi.String("0"),
FilterModal: pulumi.String("1"),
FilterPunc: pulumi.String("0"),
OutputFileType: pulumi.String("txt"),
ResTextFormat: pulumi.String("1"),
SpeakerDiarization: pulumi.String("1"),
SpeakerNumber: pulumi.String("0"),
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Tencentcloud = Pulumi.Tencentcloud;
return await Deployment.RunAsync(() =>
{
var mediaSpeechRecognitionTemplate = new Tencentcloud.CiMediaSpeechRecognitionTemplate("mediaSpeechRecognitionTemplate", new()
{
Bucket = "terraform-ci-1308919341",
SpeechRecognition = new Tencentcloud.Inputs.CiMediaSpeechRecognitionTemplateSpeechRecognitionArgs
{
ChannelNum = "1",
ConvertNumMode = "0",
EngineModelType = "16k_zh",
FilterDirty = "0",
FilterModal = "1",
FilterPunc = "0",
OutputFileType = "txt",
ResTextFormat = "1",
SpeakerDiarization = "1",
SpeakerNumber = "0",
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.tencentcloud.CiMediaSpeechRecognitionTemplate;
import com.pulumi.tencentcloud.CiMediaSpeechRecognitionTemplateArgs;
import com.pulumi.tencentcloud.inputs.CiMediaSpeechRecognitionTemplateSpeechRecognitionArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var mediaSpeechRecognitionTemplate = new CiMediaSpeechRecognitionTemplate("mediaSpeechRecognitionTemplate", CiMediaSpeechRecognitionTemplateArgs.builder()
.bucket("terraform-ci-1308919341")
.speechRecognition(CiMediaSpeechRecognitionTemplateSpeechRecognitionArgs.builder()
.channelNum("1")
.convertNumMode("0")
.engineModelType("16k_zh")
.filterDirty("0")
.filterModal("1")
.filterPunc("0")
.outputFileType("txt")
.resTextFormat("1")
.speakerDiarization("1")
.speakerNumber("0")
.build())
.build());
}
}
resources:
mediaSpeechRecognitionTemplate:
type: tencentcloud:CiMediaSpeechRecognitionTemplate
properties:
bucket: terraform-ci-1308919341
speechRecognition:
channelNum: '1'
convertNumMode: '0'
engineModelType: 16k_zh
filterDirty: '0'
filterModal: '1'
filterPunc: '0'
outputFileType: txt
resTextFormat: '1'
speakerDiarization: '1'
speakerNumber: '0'
Create CiMediaSpeechRecognitionTemplate Resource
Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.
Constructor syntax
new CiMediaSpeechRecognitionTemplate(name: string, args: CiMediaSpeechRecognitionTemplateArgs, opts?: CustomResourceOptions);
@overload
def CiMediaSpeechRecognitionTemplate(resource_name: str,
args: CiMediaSpeechRecognitionTemplateArgs,
opts: Optional[ResourceOptions] = None)
@overload
def CiMediaSpeechRecognitionTemplate(resource_name: str,
opts: Optional[ResourceOptions] = None,
bucket: Optional[str] = None,
speech_recognition: Optional[CiMediaSpeechRecognitionTemplateSpeechRecognitionArgs] = None,
ci_media_speech_recognition_template_id: Optional[str] = None,
name: Optional[str] = None)
func NewCiMediaSpeechRecognitionTemplate(ctx *Context, name string, args CiMediaSpeechRecognitionTemplateArgs, opts ...ResourceOption) (*CiMediaSpeechRecognitionTemplate, error)
public CiMediaSpeechRecognitionTemplate(string name, CiMediaSpeechRecognitionTemplateArgs args, CustomResourceOptions? opts = null)
public CiMediaSpeechRecognitionTemplate(String name, CiMediaSpeechRecognitionTemplateArgs args)
public CiMediaSpeechRecognitionTemplate(String name, CiMediaSpeechRecognitionTemplateArgs args, CustomResourceOptions options)
type: tencentcloud:CiMediaSpeechRecognitionTemplate
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.
Parameters
- name string
- The unique name of the resource.
- args CiMediaSpeechRecognitionTemplateArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- resource_name str
- The unique name of the resource.
- args CiMediaSpeechRecognitionTemplateArgs
- The arguments to resource properties.
- opts ResourceOptions
- Bag of options to control resource's behavior.
- ctx Context
- Context object for the current deployment.
- name string
- The unique name of the resource.
- args CiMediaSpeechRecognitionTemplateArgs
- The arguments to resource properties.
- opts ResourceOption
- Bag of options to control resource's behavior.
- name string
- The unique name of the resource.
- args CiMediaSpeechRecognitionTemplateArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- name String
- The unique name of the resource.
- args CiMediaSpeechRecognitionTemplateArgs
- The arguments to resource properties.
- options CustomResourceOptions
- Bag of options to control resource's behavior.
CiMediaSpeechRecognitionTemplate Resource Properties
To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.
Inputs
In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.
The CiMediaSpeechRecognitionTemplate resource accepts the following input properties:
- Bucket string
- bucket name.
- Speech
Recognition CiMedia Speech Recognition Template Speech Recognition - audio configuration.
- Ci
Media stringSpeech Recognition Template Id - ID of the resource.
- Name string
- The template name only supports
Chinese
,English
,numbers
,_
,-
and*
.
- Bucket string
- bucket name.
- Speech
Recognition CiMedia Speech Recognition Template Speech Recognition Args - audio configuration.
- Ci
Media stringSpeech Recognition Template Id - ID of the resource.
- Name string
- The template name only supports
Chinese
,English
,numbers
,_
,-
and*
.
- bucket String
- bucket name.
- speech
Recognition CiMedia Speech Recognition Template Speech Recognition - audio configuration.
- ci
Media StringSpeech Recognition Template Id - ID of the resource.
- name String
- The template name only supports
Chinese
,English
,numbers
,_
,-
and*
.
- bucket string
- bucket name.
- speech
Recognition CiMedia Speech Recognition Template Speech Recognition - audio configuration.
- ci
Media stringSpeech Recognition Template Id - ID of the resource.
- name string
- The template name only supports
Chinese
,English
,numbers
,_
,-
and*
.
- bucket str
- bucket name.
- speech_
recognition CiMedia Speech Recognition Template Speech Recognition Args - audio configuration.
- ci_
media_ strspeech_ recognition_ template_ id - ID of the resource.
- name str
- The template name only supports
Chinese
,English
,numbers
,_
,-
and*
.
- bucket String
- bucket name.
- speech
Recognition Property Map - audio configuration.
- ci
Media StringSpeech Recognition Template Id - ID of the resource.
- name String
- The template name only supports
Chinese
,English
,numbers
,_
,-
and*
.
Outputs
All input properties are implicitly available as output properties. Additionally, the CiMediaSpeechRecognitionTemplate resource produces the following output properties:
- Id string
- The provider-assigned unique ID for this managed resource.
- Id string
- The provider-assigned unique ID for this managed resource.
- id String
- The provider-assigned unique ID for this managed resource.
- id string
- The provider-assigned unique ID for this managed resource.
- id str
- The provider-assigned unique ID for this managed resource.
- id String
- The provider-assigned unique ID for this managed resource.
Look up Existing CiMediaSpeechRecognitionTemplate Resource
Get an existing CiMediaSpeechRecognitionTemplate resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.
public static get(name: string, id: Input<ID>, state?: CiMediaSpeechRecognitionTemplateState, opts?: CustomResourceOptions): CiMediaSpeechRecognitionTemplate
@staticmethod
def get(resource_name: str,
id: str,
opts: Optional[ResourceOptions] = None,
bucket: Optional[str] = None,
ci_media_speech_recognition_template_id: Optional[str] = None,
name: Optional[str] = None,
speech_recognition: Optional[CiMediaSpeechRecognitionTemplateSpeechRecognitionArgs] = None) -> CiMediaSpeechRecognitionTemplate
func GetCiMediaSpeechRecognitionTemplate(ctx *Context, name string, id IDInput, state *CiMediaSpeechRecognitionTemplateState, opts ...ResourceOption) (*CiMediaSpeechRecognitionTemplate, error)
public static CiMediaSpeechRecognitionTemplate Get(string name, Input<string> id, CiMediaSpeechRecognitionTemplateState? state, CustomResourceOptions? opts = null)
public static CiMediaSpeechRecognitionTemplate get(String name, Output<String> id, CiMediaSpeechRecognitionTemplateState state, CustomResourceOptions options)
resources: _: type: tencentcloud:CiMediaSpeechRecognitionTemplate get: id: ${id}
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- resource_name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- Bucket string
- bucket name.
- Ci
Media stringSpeech Recognition Template Id - ID of the resource.
- Name string
- The template name only supports
Chinese
,English
,numbers
,_
,-
and*
. - Speech
Recognition CiMedia Speech Recognition Template Speech Recognition - audio configuration.
- Bucket string
- bucket name.
- Ci
Media stringSpeech Recognition Template Id - ID of the resource.
- Name string
- The template name only supports
Chinese
,English
,numbers
,_
,-
and*
. - Speech
Recognition CiMedia Speech Recognition Template Speech Recognition Args - audio configuration.
- bucket String
- bucket name.
- ci
Media StringSpeech Recognition Template Id - ID of the resource.
- name String
- The template name only supports
Chinese
,English
,numbers
,_
,-
and*
. - speech
Recognition CiMedia Speech Recognition Template Speech Recognition - audio configuration.
- bucket string
- bucket name.
- ci
Media stringSpeech Recognition Template Id - ID of the resource.
- name string
- The template name only supports
Chinese
,English
,numbers
,_
,-
and*
. - speech
Recognition CiMedia Speech Recognition Template Speech Recognition - audio configuration.
- bucket str
- bucket name.
- ci_
media_ strspeech_ recognition_ template_ id - ID of the resource.
- name str
- The template name only supports
Chinese
,English
,numbers
,_
,-
and*
. - speech_
recognition CiMedia Speech Recognition Template Speech Recognition Args - audio configuration.
- bucket String
- bucket name.
- ci
Media StringSpeech Recognition Template Id - ID of the resource.
- name String
- The template name only supports
Chinese
,English
,numbers
,_
,-
and*
. - speech
Recognition Property Map - audio configuration.
Supporting Types
CiMediaSpeechRecognitionTemplateSpeechRecognition, CiMediaSpeechRecognitionTemplateSpeechRecognitionArgs
- Channel
Num string - Number of voice channels: 1 means mono. EngineModelType supports only mono for non-telephone scenarios, and 2 means dual channels (only 8k_zh engine model supports dual channels, which should correspond to both sides of the call).
- Engine
Model stringType - Engine model type, divided into phone scene and non-phone scene, phone scene: 8k_zh: phone 8k Chinese Mandarin general (can be used for dual-channel audio), 8k_zh_s: phone 8k Chinese Mandarin speaker separation (only for monophonic audio), 8k_en: Telephone 8k English; non-telephone scene: 16k_zh: 16k Mandarin Chinese, 16k_zh_video: 16k audio and video field, 16k_en: 16k English, 16k_ca: 16k Cantonese, 16k_ja: 16k Japanese, 16k_zh_edu: Chinese education, 16k_en_edu: English education, 16k_zh_medical: medical, 16k_th: Thai, 16k_zh_dialect: multi-dialect, supports 23 dialects.
- Convert
Num stringMode - Whether to perform intelligent conversion of Arabic numerals (currently supports Mandarin Chinese engine): 0 means no conversion, directly output Chinese numbers, 1 means intelligently convert to Arabic numerals according to the scene, 3 means enable math-related digital conversion, the default value is 0.
- Filter
Dirty string - Whether to filter dirty words (currently supports Mandarin Chinese engine): 0 means not to filter dirty words, 1 means to filter dirty words, 2 means to replace dirty words with *, the default value is 0.
- Filter
Modal string - Whether to pass modal particles (currently supports Mandarin Chinese engine): 0 means not to filter modal particles, 1 means partial filtering, 2 means strict filtering, and the default value is 0.
- Filter
Punc string - Whether to filter punctuation (currently supports Mandarin Chinese engine): 0 means no filtering, 1 means filtering end-of-sentence punctuation, 2 means filtering all punctuation, the default value is 0.
- Output
File stringType - Output file type, optional txt, srt. The default is txt.
- Res
Text stringFormat - Recognition result return form: 0 means the recognition result text (including segmented time stamps), 1 is the detailed recognition result at the word level granularity, without punctuation, and includes the speech rate value (a list of word time stamps, generally used to generate subtitle scenes), 2 Detailed recognition results at word-level granularity (including punctuation and speech rate values)..
- Speaker
Diarization string - Whether to enable speaker separation: 0 means not enabled, 1 means enabled (only supports 8k_zh, 16k_zh, 16k_zh_video, monophonic audio), the default value is 0, Note: 8K telephony scenarios suggest using dual-channel to distinguish between the two parties, set ChannelNum=2 is enough, no need to enable speaker separation.
- Speaker
Number string - The number of speakers to be separated (need to be used in conjunction with enabling speaker separation), value range: 0-10, 0 means automatic separation (currently only supports <= 6 people), 1-10 represents the number of specified speakers to be separated. The default value is 0.
- Channel
Num string - Number of voice channels: 1 means mono. EngineModelType supports only mono for non-telephone scenarios, and 2 means dual channels (only 8k_zh engine model supports dual channels, which should correspond to both sides of the call).
- Engine
Model stringType - Engine model type, divided into phone scene and non-phone scene, phone scene: 8k_zh: phone 8k Chinese Mandarin general (can be used for dual-channel audio), 8k_zh_s: phone 8k Chinese Mandarin speaker separation (only for monophonic audio), 8k_en: Telephone 8k English; non-telephone scene: 16k_zh: 16k Mandarin Chinese, 16k_zh_video: 16k audio and video field, 16k_en: 16k English, 16k_ca: 16k Cantonese, 16k_ja: 16k Japanese, 16k_zh_edu: Chinese education, 16k_en_edu: English education, 16k_zh_medical: medical, 16k_th: Thai, 16k_zh_dialect: multi-dialect, supports 23 dialects.
- Convert
Num stringMode - Whether to perform intelligent conversion of Arabic numerals (currently supports Mandarin Chinese engine): 0 means no conversion, directly output Chinese numbers, 1 means intelligently convert to Arabic numerals according to the scene, 3 means enable math-related digital conversion, the default value is 0.
- Filter
Dirty string - Whether to filter dirty words (currently supports Mandarin Chinese engine): 0 means not to filter dirty words, 1 means to filter dirty words, 2 means to replace dirty words with *, the default value is 0.
- Filter
Modal string - Whether to pass modal particles (currently supports Mandarin Chinese engine): 0 means not to filter modal particles, 1 means partial filtering, 2 means strict filtering, and the default value is 0.
- Filter
Punc string - Whether to filter punctuation (currently supports Mandarin Chinese engine): 0 means no filtering, 1 means filtering end-of-sentence punctuation, 2 means filtering all punctuation, the default value is 0.
- Output
File stringType - Output file type, optional txt, srt. The default is txt.
- Res
Text stringFormat - Recognition result return form: 0 means the recognition result text (including segmented time stamps), 1 is the detailed recognition result at the word level granularity, without punctuation, and includes the speech rate value (a list of word time stamps, generally used to generate subtitle scenes), 2 Detailed recognition results at word-level granularity (including punctuation and speech rate values)..
- Speaker
Diarization string - Whether to enable speaker separation: 0 means not enabled, 1 means enabled (only supports 8k_zh, 16k_zh, 16k_zh_video, monophonic audio), the default value is 0, Note: 8K telephony scenarios suggest using dual-channel to distinguish between the two parties, set ChannelNum=2 is enough, no need to enable speaker separation.
- Speaker
Number string - The number of speakers to be separated (need to be used in conjunction with enabling speaker separation), value range: 0-10, 0 means automatic separation (currently only supports <= 6 people), 1-10 represents the number of specified speakers to be separated. The default value is 0.
- channel
Num String - Number of voice channels: 1 means mono. EngineModelType supports only mono for non-telephone scenarios, and 2 means dual channels (only 8k_zh engine model supports dual channels, which should correspond to both sides of the call).
- engine
Model StringType - Engine model type, divided into phone scene and non-phone scene, phone scene: 8k_zh: phone 8k Chinese Mandarin general (can be used for dual-channel audio), 8k_zh_s: phone 8k Chinese Mandarin speaker separation (only for monophonic audio), 8k_en: Telephone 8k English; non-telephone scene: 16k_zh: 16k Mandarin Chinese, 16k_zh_video: 16k audio and video field, 16k_en: 16k English, 16k_ca: 16k Cantonese, 16k_ja: 16k Japanese, 16k_zh_edu: Chinese education, 16k_en_edu: English education, 16k_zh_medical: medical, 16k_th: Thai, 16k_zh_dialect: multi-dialect, supports 23 dialects.
- convert
Num StringMode - Whether to perform intelligent conversion of Arabic numerals (currently supports Mandarin Chinese engine): 0 means no conversion, directly output Chinese numbers, 1 means intelligently convert to Arabic numerals according to the scene, 3 means enable math-related digital conversion, the default value is 0.
- filter
Dirty String - Whether to filter dirty words (currently supports Mandarin Chinese engine): 0 means not to filter dirty words, 1 means to filter dirty words, 2 means to replace dirty words with *, the default value is 0.
- filter
Modal String - Whether to pass modal particles (currently supports Mandarin Chinese engine): 0 means not to filter modal particles, 1 means partial filtering, 2 means strict filtering, and the default value is 0.
- filter
Punc String - Whether to filter punctuation (currently supports Mandarin Chinese engine): 0 means no filtering, 1 means filtering end-of-sentence punctuation, 2 means filtering all punctuation, the default value is 0.
- output
File StringType - Output file type, optional txt, srt. The default is txt.
- res
Text StringFormat - Recognition result return form: 0 means the recognition result text (including segmented time stamps), 1 is the detailed recognition result at the word level granularity, without punctuation, and includes the speech rate value (a list of word time stamps, generally used to generate subtitle scenes), 2 Detailed recognition results at word-level granularity (including punctuation and speech rate values)..
- speaker
Diarization String - Whether to enable speaker separation: 0 means not enabled, 1 means enabled (only supports 8k_zh, 16k_zh, 16k_zh_video, monophonic audio), the default value is 0, Note: 8K telephony scenarios suggest using dual-channel to distinguish between the two parties, set ChannelNum=2 is enough, no need to enable speaker separation.
- speaker
Number String - The number of speakers to be separated (need to be used in conjunction with enabling speaker separation), value range: 0-10, 0 means automatic separation (currently only supports <= 6 people), 1-10 represents the number of specified speakers to be separated. The default value is 0.
- channel
Num string - Number of voice channels: 1 means mono. EngineModelType supports only mono for non-telephone scenarios, and 2 means dual channels (only 8k_zh engine model supports dual channels, which should correspond to both sides of the call).
- engine
Model stringType - Engine model type, divided into phone scene and non-phone scene, phone scene: 8k_zh: phone 8k Chinese Mandarin general (can be used for dual-channel audio), 8k_zh_s: phone 8k Chinese Mandarin speaker separation (only for monophonic audio), 8k_en: Telephone 8k English; non-telephone scene: 16k_zh: 16k Mandarin Chinese, 16k_zh_video: 16k audio and video field, 16k_en: 16k English, 16k_ca: 16k Cantonese, 16k_ja: 16k Japanese, 16k_zh_edu: Chinese education, 16k_en_edu: English education, 16k_zh_medical: medical, 16k_th: Thai, 16k_zh_dialect: multi-dialect, supports 23 dialects.
- convert
Num stringMode - Whether to perform intelligent conversion of Arabic numerals (currently supports Mandarin Chinese engine): 0 means no conversion, directly output Chinese numbers, 1 means intelligently convert to Arabic numerals according to the scene, 3 means enable math-related digital conversion, the default value is 0.
- filter
Dirty string - Whether to filter dirty words (currently supports Mandarin Chinese engine): 0 means not to filter dirty words, 1 means to filter dirty words, 2 means to replace dirty words with *, the default value is 0.
- filter
Modal string - Whether to pass modal particles (currently supports Mandarin Chinese engine): 0 means not to filter modal particles, 1 means partial filtering, 2 means strict filtering, and the default value is 0.
- filter
Punc string - Whether to filter punctuation (currently supports Mandarin Chinese engine): 0 means no filtering, 1 means filtering end-of-sentence punctuation, 2 means filtering all punctuation, the default value is 0.
- output
File stringType - Output file type, optional txt, srt. The default is txt.
- res
Text stringFormat - Recognition result return form: 0 means the recognition result text (including segmented time stamps), 1 is the detailed recognition result at the word level granularity, without punctuation, and includes the speech rate value (a list of word time stamps, generally used to generate subtitle scenes), 2 Detailed recognition results at word-level granularity (including punctuation and speech rate values)..
- speaker
Diarization string - Whether to enable speaker separation: 0 means not enabled, 1 means enabled (only supports 8k_zh, 16k_zh, 16k_zh_video, monophonic audio), the default value is 0, Note: 8K telephony scenarios suggest using dual-channel to distinguish between the two parties, set ChannelNum=2 is enough, no need to enable speaker separation.
- speaker
Number string - The number of speakers to be separated (need to be used in conjunction with enabling speaker separation), value range: 0-10, 0 means automatic separation (currently only supports <= 6 people), 1-10 represents the number of specified speakers to be separated. The default value is 0.
- channel_
num str - Number of voice channels: 1 means mono. EngineModelType supports only mono for non-telephone scenarios, and 2 means dual channels (only 8k_zh engine model supports dual channels, which should correspond to both sides of the call).
- engine_
model_ strtype - Engine model type, divided into phone scene and non-phone scene, phone scene: 8k_zh: phone 8k Chinese Mandarin general (can be used for dual-channel audio), 8k_zh_s: phone 8k Chinese Mandarin speaker separation (only for monophonic audio), 8k_en: Telephone 8k English; non-telephone scene: 16k_zh: 16k Mandarin Chinese, 16k_zh_video: 16k audio and video field, 16k_en: 16k English, 16k_ca: 16k Cantonese, 16k_ja: 16k Japanese, 16k_zh_edu: Chinese education, 16k_en_edu: English education, 16k_zh_medical: medical, 16k_th: Thai, 16k_zh_dialect: multi-dialect, supports 23 dialects.
- convert_
num_ strmode - Whether to perform intelligent conversion of Arabic numerals (currently supports Mandarin Chinese engine): 0 means no conversion, directly output Chinese numbers, 1 means intelligently convert to Arabic numerals according to the scene, 3 means enable math-related digital conversion, the default value is 0.
- filter_
dirty str - Whether to filter dirty words (currently supports Mandarin Chinese engine): 0 means not to filter dirty words, 1 means to filter dirty words, 2 means to replace dirty words with *, the default value is 0.
- filter_
modal str - Whether to pass modal particles (currently supports Mandarin Chinese engine): 0 means not to filter modal particles, 1 means partial filtering, 2 means strict filtering, and the default value is 0.
- filter_
punc str - Whether to filter punctuation (currently supports Mandarin Chinese engine): 0 means no filtering, 1 means filtering end-of-sentence punctuation, 2 means filtering all punctuation, the default value is 0.
- output_
file_ strtype - Output file type, optional txt, srt. The default is txt.
- res_
text_ strformat - Recognition result return form: 0 means the recognition result text (including segmented time stamps), 1 is the detailed recognition result at the word level granularity, without punctuation, and includes the speech rate value (a list of word time stamps, generally used to generate subtitle scenes), 2 Detailed recognition results at word-level granularity (including punctuation and speech rate values)..
- speaker_
diarization str - Whether to enable speaker separation: 0 means not enabled, 1 means enabled (only supports 8k_zh, 16k_zh, 16k_zh_video, monophonic audio), the default value is 0, Note: 8K telephony scenarios suggest using dual-channel to distinguish between the two parties, set ChannelNum=2 is enough, no need to enable speaker separation.
- speaker_
number str - The number of speakers to be separated (need to be used in conjunction with enabling speaker separation), value range: 0-10, 0 means automatic separation (currently only supports <= 6 people), 1-10 represents the number of specified speakers to be separated. The default value is 0.
- channel
Num String - Number of voice channels: 1 means mono. EngineModelType supports only mono for non-telephone scenarios, and 2 means dual channels (only 8k_zh engine model supports dual channels, which should correspond to both sides of the call).
- engine
Model StringType - Engine model type, divided into phone scene and non-phone scene, phone scene: 8k_zh: phone 8k Chinese Mandarin general (can be used for dual-channel audio), 8k_zh_s: phone 8k Chinese Mandarin speaker separation (only for monophonic audio), 8k_en: Telephone 8k English; non-telephone scene: 16k_zh: 16k Mandarin Chinese, 16k_zh_video: 16k audio and video field, 16k_en: 16k English, 16k_ca: 16k Cantonese, 16k_ja: 16k Japanese, 16k_zh_edu: Chinese education, 16k_en_edu: English education, 16k_zh_medical: medical, 16k_th: Thai, 16k_zh_dialect: multi-dialect, supports 23 dialects.
- convert
Num StringMode - Whether to perform intelligent conversion of Arabic numerals (currently supports Mandarin Chinese engine): 0 means no conversion, directly output Chinese numbers, 1 means intelligently convert to Arabic numerals according to the scene, 3 means enable math-related digital conversion, the default value is 0.
- filter
Dirty String - Whether to filter dirty words (currently supports Mandarin Chinese engine): 0 means not to filter dirty words, 1 means to filter dirty words, 2 means to replace dirty words with *, the default value is 0.
- filter
Modal String - Whether to pass modal particles (currently supports Mandarin Chinese engine): 0 means not to filter modal particles, 1 means partial filtering, 2 means strict filtering, and the default value is 0.
- filter
Punc String - Whether to filter punctuation (currently supports Mandarin Chinese engine): 0 means no filtering, 1 means filtering end-of-sentence punctuation, 2 means filtering all punctuation, the default value is 0.
- output
File StringType - Output file type, optional txt, srt. The default is txt.
- res
Text StringFormat - Recognition result return form: 0 means the recognition result text (including segmented time stamps), 1 is the detailed recognition result at the word level granularity, without punctuation, and includes the speech rate value (a list of word time stamps, generally used to generate subtitle scenes), 2 Detailed recognition results at word-level granularity (including punctuation and speech rate values)..
- speaker
Diarization String - Whether to enable speaker separation: 0 means not enabled, 1 means enabled (only supports 8k_zh, 16k_zh, 16k_zh_video, monophonic audio), the default value is 0, Note: 8K telephony scenarios suggest using dual-channel to distinguish between the two parties, set ChannelNum=2 is enough, no need to enable speaker separation.
- speaker
Number String - The number of speakers to be separated (need to be used in conjunction with enabling speaker separation), value range: 0-10, 0 means automatic separation (currently only supports <= 6 people), 1-10 represents the number of specified speakers to be separated. The default value is 0.
Import
ci media_speech_recognition_template can be imported using the bucket#templateId, e.g.
$ pulumi import tencentcloud:index/ciMediaSpeechRecognitionTemplate:CiMediaSpeechRecognitionTemplate media_speech_recognition_template terraform-ci-xxxxxx#t1d794430f2f1f4350b11e905ce2c6167e
To learn more about importing existing cloud resources, see Importing resources.
Package Details
- Repository
- tencentcloud tencentcloudstack/terraform-provider-tencentcloud
- License
- Notes
- This Pulumi package is based on the
tencentcloud
Terraform Provider.