This patch modifies src/fann_train_data.c and src/include/train_data.h.
It adds two functions. fann_merge_array_train_data and fann_dump_train_data.
The story behind them is that I'm using fann in some speech classification experiments. My data is all in phonetic transcriptions and feature tracks. The program I wrote has a layer to translate these into fann_train_data structures. I needed a way to merge them all into a single training set.
The first option I tried was to use fann_merge_train_data for each new fann_train_data structure produced from an utterance. However, with the number of training points I have, about 50,000 training tokens in approximately 5,000 utterances, this was too slow.
The second option is what is in the patch. It takes an array of fann_train_data structures and merges them recursively. It basically performs the merge of a mergesort in order to reduce the number of calls to fann_merge_train_data.
The second function, fann_dump_train_data is simply a debugging function to check that features are being translated correctly.
The patch was generated against the gsoc2007 branch of fann.
Code:
Index: src/fann_train_data.c
===================================================================
RCS file: /cvsroot/fann/fann/src/fann_train_data.c,v
retrieving revision 1.34.4.7
diff -u -p -r1.34.4.7 fann_train_data.c
--- src/fann_train_data.c 31 Aug 2008 11:16:24 -0000 1.34.4.7
+++ src/fann_train_data.c 7 Oct 2008 20:57:42 -0000
@@ -610,6 +610,86 @@ FANN_EXTERNAL struct fann_train_data *FA
return dest;
}
+/* Function: fann_merge_array_train_date
+
+ Returns <struct fann_train_data> that is the merge of all the elements in *data*
+ */
+FANN_EXTERNAL struct fann_train_data* FANN_API fann_merge_array_train_data(struct fann_train_data **data, int num_data)
+{
+ int mid_point;
+ struct fann_train_data **d1 = data;
+ struct fann_train_data **d2 = NULL;
+ int num_d1, num_d2;
+
+ struct fann_train_data *d1m = NULL;
+ struct fann_train_data *d2m = NULL;
+ struct fann_train_data *dm = NULL;
+
+ if (num_data == 1 && data[0]) {
+ /* we duplicate the data so that it can be
+ free later on up the call tree */
+ return fann_duplicate_train_data(data[0]);
+ } else if ((num_data == 1 && !data[0]) || num_data == 0) {
+ return NULL;
+ }
+
+ /* find mid point */
+ mid_point = num_data / 2;
+ if (num_data % 2 == 0) {
+ num_d1 = mid_point;
+ } else {
+ num_d1 = mid_point + 1;
+ }
+
+ d2 = data + num_d1;
+ num_d2 = mid_point;
+
+ d1m = fann_merge_array_train_data(d1, num_d1);
+ d2m = fann_merge_array_train_data(d2, num_d2);
+
+ if (d1m && d2m) {
+ dm = fann_merge_train_data(d1m, d2m);
+
+ /* clean up a bit... if we don't free these up, then it's just a giant
+ memory leak */
+ fann_destroy_train(d1m);
+ fann_destroy_train(d2m);
+
+ return dm;
+ } else if (d1m) {
+ return d1m;
+ } else if (d2m) {
+ return d2m;
+ }
+
+ return NULL;
+}
+
+/* Function: fann_dump_train_data
+
+ Dumps *data* to the file *f*. If file is null, stderr is used.
+ */
+FANN_EXTERNAL void FANN_API fann_dump_train_data(struct fann_train_data *data, FILE *f)
+{
+ int i, j;
+ if (f == NULL) {
+ f = stderr;
+ }
+
+ for (i = 0; i < data->num_data; i++) {
+ fprintf(f, "Num %d\n", i);
+ fprintf(f, "\ti: ");
+ for (j = 0; j < data->num_input; j++) {
+ fprintf(f, "%f ", data->input[i][j]);
+ }
+ fprintf(f, "\n\to: ");
+ for (j = 0; j < data->num_output; j++) {
+ fprintf(f, "%f ", data->output[i][j]);
+ }
+ fprintf(f, "\n");
+ }
+}
+
FANN_EXTERNAL struct fann_train_data *FANN_API fann_subset_train_data(struct fann_train_data
*data, unsigned int pos,
unsigned int length)
Index: src/include/fann_train.h
===================================================================
RCS file: /cvsroot/fann/fann/src/include/fann_train.h,v
retrieving revision 1.26.4.2
diff -u -p -r1.26.4.2 fann_train.h
--- src/include/fann_train.h 31 Aug 2008 11:16:25 -0000 1.26.4.2
+++ src/include/fann_train.h 7 Oct 2008 20:57:43 -0000
@@ -585,6 +585,17 @@ FANN_EXTERNAL struct fann_train_data *FA
*/
FANN_EXTERNAL struct fann_train_data *FANN_API fann_duplicate_train_data(struct fann_train_data
*data);
+/* Function: fann_merge_array_train_date
+
+ Returns <struct fann_train_data> that is the merge of all the elements in *data*
+ */
+FANN_EXTERNAL struct fann_train_data* FANN_API fann_merge_array_train_data(struct fann_train_data **data, int num_data);
+
+/* Function: fann_dump_train_data
+
+ Dumps *data* to the file *f*. If file is null, stderr is used.
+ */
+FANN_EXTERNAL void FANN_API fann_dump_train_data(struct fann_train_data *data, FILE *f);
/* Function: fann_subset_train_data