1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
|
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <sysexits.h>
#include <fcntl.h>
#include <math.h>
#include <xas/spatial.h>
#include <xas/synth.h>
#include <xas/vox.h>
#include <xas/audio.h>
#include <xas/riff.h>
static void usage(int argc, char **argv, const char *message, ...) {
va_list args;
va_start(args, message);
if (message) {
vfprintf(stderr, message, args);
fputc('\n', stderr);
}
va_end(args);
fprintf(stderr, "usage: %s output.wav\n", argv[0]);
exit(EX_USAGE);
}
int main(int argc, char **argv) {
xas_spatial_scene *scene;
xas_synth *sine;
xas_audio_stream *synth,
*voice,
*output,
*wave;
xas_vox *vox;
xas_audio_format format = {
.channels = XAS_AUDIO_STEREO,
.sample_size = XAS_AUDIO_PCM_16_BIT,
.sample_rate = 44100
};
size_t buffer_size = 735,
duration_s = 300,
i;
xas_spatial_coord speakers[2] = {
{ -0.09, 0.0, 0.0 },
{ 0.09, 0.0, 0.0 }
};
if (argc != 2) {
usage(argc, argv, "No output file provided");
}
if ((wave = xas_riff_new_file(argv[1],
format,
O_WRONLY | O_CREAT | O_TRUNC)) == NULL) {
goto error_riff_new_file;
}
if ((sine = xas_synth_new(format,
buffer_size,
XAS_SYNTH_SINE)) == NULL) {
goto error_synth_new;
}
if ((synth = xas_synth_stream_new(sine)) == NULL) {
goto error_synth_stream_new;
}
if ((vox = xas_vox_new(format,
buffer_size,
"/usr/bin/text2wave")) == NULL) {
goto error_vox_new;
}
xas_vox_set_parameter_float(vox, "Duration_Stretch", 1.3);
if ((voice = xas_vox_stream_new(vox)) == NULL) {
goto error_vox_stream_new;
}
if ((scene = xas_spatial_scene_new(format,
speakers[0],
speakers[1])) == NULL) {
goto error_spatial_scene_new;
}
if ((output = xas_spatial_scene_stream_new(scene,
buffer_size)) == NULL) {
goto error_spatial_scene_stream_new;
}
if (xas_spatial_scene_add_object(scene,
(xas_spatial_coord){ 5.2, 0.0, 0.0 },
synth,
sine) == NULL) {
goto error_spatial_scene_add_object;
}
if (xas_spatial_scene_add_object(scene,
(xas_spatial_coord){ -5.2, 0.0, 0.0 },
voice,
vox) == NULL) {
goto error_spatial_scene_add_object;
}
xas_synth_set_frequency(sine, 2600);
xas_vox_set_voice(vox, "voice_cmu_us_slt_cg");
xas_synth_start(sine);
xas_vox_sayf(vox, "I want to eat your soul.\n");
xas_vox_sayf(vox, "You don't understand.\n");
xas_vox_sayf(vox, "I really want to eat your soul.\n");
xas_vox_generate(vox);
for (i=0; i<duration_s; i++) {
void *buf;
ssize_t readlen;
if ((readlen = xas_audio_stream_read(output,
&buf,
buffer_size)) < 0) {
goto error_audio_stream_read;
}
if (xas_audio_stream_write(wave, buf, readlen) < 0) {
goto error_audio_stream_write;
}
}
xas_audio_stream_flush(wave);
xas_audio_stream_destroy(output);
xas_spatial_scene_destroy(scene);
xas_audio_stream_destroy(voice);
xas_vox_destroy(vox);
xas_audio_stream_destroy(synth);
xas_audio_stream_destroy(wave);
return EX_OK;
error_audio_stream_read:
error_audio_stream_write:
error_spatial_scene_add_object:
xas_audio_stream_destroy(output);
error_spatial_scene_stream_new:
xas_spatial_scene_destroy(scene);
error_spatial_scene_new:
xas_audio_stream_destroy(voice);
error_vox_stream_new:
xas_vox_destroy(vox);
error_vox_new:
xas_audio_stream_destroy(synth);
error_synth_stream_new:
xas_synth_destroy(sine);
error_synth_new:
xas_audio_stream_destroy(wave);
error_riff_new_file:
return EX_OSERR;
}
|