Orbiter 2022
Combinatorial Objects
spreadsheet.cpp
Go to the documentation of this file.
1// spreadsheet.cpp
2//
3// Anton Betten
4// July 18, 2012
5//
6// moved to GALOIS: March 15, 2013
7//
8
9#include "foundations.h"
10
11using namespace std;
12
13
14namespace orbiter {
15namespace layer1_foundations {
16namespace data_structures {
17
18
20{
21 null();
22}
23
25{
26 freeself();
27}
28
30{
31 tokens = NULL;
32 line_start = NULL;
33 line_size = NULL;
34 Table = NULL;
35}
36
38{
39 int i;
40
41 if (tokens) {
42 for (i = 0; i < nb_tokens; i++) {
43 FREE_char(tokens[i]);
44 }
46 }
47 if (line_start) {
49 }
50 if (line_size) {
52 }
53 if (Table) {
55 }
56 null();
57}
58
59void spreadsheet::init_set_of_sets(set_of_sets *S, int f_make_heading)
60{
61 int s, i, j, a, h, len, offset = 0;
62 char str[1000];
63
64 s = S->largest_set_size();
65 if (f_make_heading) {
67 offset = 1;
68 }
69 else {
71 offset = 0;
72 }
75 for (i = 0; i < nb_rows * nb_cols; i++) {
76 Table[i] = -1;
77 }
80
81 h = 0;
82 if (f_make_heading) {
83 for (j = 0; j < s + 1; j++) {
84 snprintf(str, 1000, "C%d", j);
85 len = strlen(str);
86 tokens[h] = NEW_char(len + 1);
87 strcpy(tokens[h], str);
88 Table[0 * nb_cols + j] = h;
89 h++;
90 }
91 }
92 for (i = 0; i < S->nb_sets; i++) {
93
94 snprintf(str, 1000, "%ld", S->Set_size[i]);
95 len = strlen(str);
96 tokens[h] = NEW_char(len + 1);
97 strcpy(tokens[h], str);
98 Table[(i + offset) * nb_cols + 0] = h;
99 h++;
100
101 for (j = 0; j < S->Set_size[i]; j++) {
102 a = S->Sets[i][j];
103
104 snprintf(str, 1000, "%d", a);
105 len = strlen(str);
106 tokens[h] = NEW_char(len + 1);
107 strcpy(tokens[h], str);
108 Table[(i + offset) * nb_cols + 1 + j] = h;
109 h++;
110
111 }
112 }
113}
114
115void spreadsheet::init_int_matrix(int nb_rows, int nb_cols, int *A)
116{
117 int i, len, a;
118 char str[1000];
119
123 for (i = 0; i < nb_rows * nb_cols; i++) {
124 Table[i] = -1;
125 }
128 for (i = 0; i < nb_tokens; i++) {
129 a = A[i];
130 snprintf(str, 1000, "%d", a);
131 len = strlen(str);
132 tokens[i] = NEW_char(len + 1);
133 strcpy(tokens[i], str);
134 Table[i] = i;
135 }
136}
137
138void spreadsheet::init_empty_table(int nb_rows, int nb_cols)
139{
140 int i;
141
145 for (i = 0; i < nb_rows * nb_cols; i++) {
146 Table[i] = i;
147 }
150 for (i = 0; i < nb_tokens; i++) {
151 tokens[i] = NULL;
152 }
153}
154
156 int col_idx, const char *text)
157{
158 int l, t;
159
160 t = Table[row_idx * nb_cols + col_idx];
161 if (tokens[t]) {
162 //cout << "fill_column_with_text before FREE_char i="
163 //<< i << " col_idx=" << col_idx << " t=" << t << endl;
164 FREE_char(tokens[t]);
165 }
166 l = strlen(text);
167 tokens[t] = NEW_char(l + 1);
168 strcpy(tokens[t], text);
169}
170
172 int col_idx, std::string &text)
173{
174 int l, t;
175
176 t = Table[row_idx * nb_cols + col_idx];
177 if (tokens[t]) {
178 //cout << "fill_column_with_text before FREE_char i="
179 //<< i << " col_idx=" << col_idx << " t=" << t << endl;
180 FREE_char(tokens[t]);
181 }
182 l = text.size();
183 tokens[t] = NEW_char(l + 1);
184 strcpy(tokens[t], text.c_str());
185}
186
188 int col_idx, long int val)
189{
190 int l, t;
191 char str[1000];
192
193 sprintf(str, "%ld", val);
194
195 t = Table[row_idx * nb_cols + col_idx];
196 if (tokens[t]) {
197 //cout << "fill_column_with_text before FREE_char i="
198 //<< i << " col_idx=" << col_idx << " t=" << t << endl;
199 FREE_char(tokens[t]);
200 }
201 l = strlen(str);
202 tokens[t] = NEW_char(l + 1);
203 strcpy(tokens[t], str);
204}
205
206
208 const char **text, const char *heading)
209{
210 int i, l, t;
211
212 for (i = 0; i < nb_rows; i++) {
213 t = Table[i * nb_cols + col_idx];
214 if (tokens[t]) {
215 //cout << "fill_column_with_text before FREE_char i="
216 //<< i << " col_idx=" << col_idx << " t=" << t << endl;
217 FREE_char(tokens[t]);
218 }
219 if (i == 0) {
220 l = strlen(heading);
221 tokens[t] = NEW_char(l + 1);
222 strcpy(tokens[t], heading);
223 }
224 else {
225 l = strlen(text[i - 1]);
226 tokens[t] = NEW_char(l + 1);
227 strcpy(tokens[t], text[i - 1]);
228 }
229 }
230}
231
233 int *data, const char *heading)
234{
235 int i, l, t;
236 char str[1000];
237
238 for (i = 0; i < nb_rows; i++) {
239 t = Table[i * nb_cols + col_idx];
240 if (tokens[t]) {
241 //cout << "fill_column_with_int before FREE_char i=" << i
242 //<< " col_idx=" << col_idx << " t=" << t << endl;
243 FREE_char(tokens[t]);
244 }
245 if (i == 0) {
246 l = strlen(heading);
247 tokens[t] = NEW_char(l + 1);
248 strcpy(tokens[t], heading);
249 }
250 else {
251 snprintf(str, 1000, "%d", data[i - 1]);
252 l = strlen(str);
253 tokens[t] = NEW_char(l + 1);
254 strcpy(tokens[t], str);
255 }
256 }
257}
258
260 long int *data, const char *heading)
261{
262 int i, l, t;
263 char str[1000];
264
265 for (i = 0; i < nb_rows; i++) {
266 t = Table[i * nb_cols + col_idx];
267 if (tokens[t]) {
268 //cout << "fill_column_with_int before FREE_char i=" << i
269 //<< " col_idx=" << col_idx << " t=" << t << endl;
270 FREE_char(tokens[t]);
271 }
272 if (i == 0) {
273 l = strlen(heading);
274 tokens[t] = NEW_char(l + 1);
275 strcpy(tokens[t], heading);
276 }
277 else {
278 sprintf(str, "%ld", data[i - 1]);
279 l = strlen(str);
280 tokens[t] = NEW_char(l + 1);
281 strcpy(tokens[t], str);
282 }
283 }
284}
285
287 int col_idx, const char *heading)
288{
289 int i, l, t;
290 char str[1000];
291
292 for (i = 0; i < nb_rows; i++) {
293 t = Table[i * nb_cols + col_idx];
294 if (tokens[t]) {
295 //cout << "fill_column_with_row_index before FREE_char i="
296 //<< i << " col_idx=" << col_idx << " t=" << t << endl;
297 FREE_char(tokens[t]);
298 }
299 if (i == 0) {
300 l = strlen(heading);
301 tokens[t] = NEW_char(l + 1);
302 strcpy(tokens[t], heading);
303 }
304 else {
305 snprintf(str, 1000, "%d", i - 1);
306 l = strlen(str);
307 tokens[t] = NEW_char(l + 1);
308 strcpy(tokens[t], str);
309 }
310 }
311}
312
313void spreadsheet::add_token(const char *label)
314{
315 char **tokens2;
316 int i, j, len;
317
318 tokens2 = NEW_pchar(nb_tokens + 1);
319 for (i = 0; i < nb_tokens; i++) {
320 tokens2[i] = tokens[i];
321 }
322 len = strlen(label);
323 tokens2[nb_tokens] = NEW_char(len + 1);
324 for (i = 0, j = 0; i < len; i++) {
325 if ((int)label[i] < 0) {
326 cout << "spreadsheet::add_token negative character "
327 << (int) label[i] << endl;
328 }
329 else {
330 tokens2[nb_tokens][j++] = label[i];
331 }
332 }
333 tokens2[nb_tokens][j++] = 0;
334 //strcpy(tokens2[nb_tokens], label);
336 tokens = tokens2;
337 nb_tokens++;
338}
339
340void spreadsheet::save(std::string &fname, int verbose_level)
341{
342 int f_v = (verbose_level >= 1);
344
345
346 {
347 ofstream f(fname);
348 print_table(f, FALSE);
349 f << "END" << endl;
350 }
351 if (f_v) {
352 cout << "Written file " << fname << " of size "
353 << Fio.file_size(fname) << endl;
354 }
355}
356
357void spreadsheet::read_spreadsheet(std::string &fname, int verbose_level)
358{
359 int f_v = (verbose_level >= 1);
360 int f_vv = FALSE; //(verbose_level >= 2);
361 int i;
363
364 if (f_v) {
365 cout << "spreadsheet::read_spreadsheet reading file "
366 << fname << " of size " << Fio.file_size(fname) << endl;
367 }
368
369
370
371 if (f_v) {
372 cout << "spreadsheet::read_spreadsheet before tokenize" << endl;
373 }
374 tokenize(fname, tokens, nb_tokens, 0 /*verbose_level*/);
375
376 if (f_v) {
377 cout << "spreadsheet::read_spreadsheet read file with "
378 << nb_tokens << " tokens" << endl;
379
380 if (f_vv) {
381 for (i = 0; i < nb_tokens; i++) {
382 cout << setw(6) << i << " : '" << tokens[i] << "'" << endl;
383 }
384 }
385 }
386
387
388
389 if (f_v) {
390 cout << "spreadsheet::read_spreadsheet before find_rows" << endl;
391 }
392 find_rows(0 /*verbose_level - 2*/);
393
394 if (f_v) {
395 cout << "spreadsheet::read_spreadsheet Found "
396 << nb_lines << " lines" << endl;
397 }
398
399 if (f_vv) {
400 {
401 int f, l, j;
402
403 for (i = 0; i < nb_lines; i++) {
404 f = line_start[i];
405 l = line_size[i];
406 cout << "Line " << i << " : ";
407 for (j = 0; j < l; j++) {
408 cout << "'" << tokens[f + j] << "'";
409 if (j < l - 1) {
410 cout << ", ";
411 }
412 }
413 cout << endl;
414 }
415 }
416 }
417
418 int j;
419
421 nb_cols = line_size[0];
423 for (i = 0; i < nb_rows; i++) {
424 for (j = 0; j < nb_cols; j++) {
425 Table[i * nb_cols + j] = -1;
426 }
427 }
428 for (i = 0; i < nb_rows; i++) {
429 for (j = 0; j < MINIMUM(nb_cols, line_size[i]); j++) {
430 Table[i * nb_cols + j] = line_start[i] + j;
431 }
432 }
433
434 if (f_vv) {
435 cout << "spreadsheet::read_spreadsheet" << endl;
436 for (i = 0; i < nb_rows; i++) {
437 for (j = 0; j < nb_cols; j++) {
438 cout << "row " << i << " column " << j << " entry '"
439 << tokens[Table[i * nb_cols + j]] << "'" << endl;
440 }
441 }
442 }
443
444 if (f_v) {
445 cout << "spreadsheet::read_spreadsheet reading file "
446 << fname << " of size " << Fio.file_size(fname)
447 << " done" << endl;
448 }
449
450}
451
452void spreadsheet::print_table(ostream &ost, int f_enclose_in_parentheses)
453{
454 int i;
455
456 //cout << "Table:" << endl;
457 for (i = 0; i < nb_rows; i++) {
458 print_table_row(i, f_enclose_in_parentheses, ost);
459 }
460}
461
463 ostream &ost, int f_enclose_in_parentheses)
464{
465 int i, j;
466 int *f_column_select;
467
468 f_column_select = NEW_int(nb_cols);
469 for (j = 0; j < nb_cols; j++) {
470 f_column_select[j] = TRUE;
471 }
472
473 //cout << "Table:" << endl;
474 ost << "\\begin{tabular}{|c|";
475 for (j = 0; j < nb_cols; j++) {
476 ost << "c|";
477 }
478 ost << "}" << endl;
479 for (i = 0; i < nb_rows; i++) {
481 f_column_select, f_enclose_in_parentheses, ost);
482 }
483 ost << "\\end{tabular}" << endl;
484
485 FREE_int(f_column_select);
486}
487
489 int *f_column_select, int f_enclose_in_parentheses,
490 int nb_lines_per_table)
491{
492 int I, i, j;
493 int nb_r;
494
495
496 nb_r = nb_rows - 1; // take away one because of header
497
498 //cout << "Table:" << endl;
499 for (I = 0; I < (nb_r + nb_lines_per_table - 1) / nb_lines_per_table; I++) {
500 ost << "\\begin{tabular}[t]{|";
501 for (j = 0; j < nb_cols; j++) {
502 if (f_column_select[j]) {
503 ost << "r|";
504 //ost << "p{3cm}|";
505 }
506 }
507 ost << "}" << endl;
508 ost << "\\hline" << endl;
509
511 f_column_select,
512 f_enclose_in_parentheses,
513 ost);
514 ost << "\\hline" << endl;
515 ost << "\\hline" << endl;
516
517
518 for (i = 0; i < nb_lines_per_table; i++) {
519 if (1 + I * nb_lines_per_table + i < nb_rows) {
520 print_table_row_latex(1 + I * nb_lines_per_table + i,
521 f_column_select,
522 f_enclose_in_parentheses,
523 ost);
524 ost << "\\hline" << endl;
525 }
526 }
527 ost << "\\end{tabular}" << endl;
528 }
529}
530
532 int f_enclose_in_parentheses, ostream &ost)
533{
534 int j, t; //, h;
535 int f_enclose;
536
537 //cout << "Row " << row << " : ";
538 for (j = 0; j < nb_cols; j++) {
539 t = Table[row * nb_cols + j];
540 if (t >= 0) {
541#if 0
542 if (row == 0 && j == 0) {
543 cout << "printing token '" << tokens[t] << "'" << endl;
544 for (h = 0; h < 10; h++) {
545 cout << h << " : " << (int) tokens[t][h] << endl;
546 }
547 }
548#endif
549 if (tokens[t][0] == '\"') {
550 f_enclose = FALSE;
551 }
552 else {
553 f_enclose = TRUE;
554 }
555 if (f_enclose) {
556 ost << "\"";
557 }
558 if (tokens[t] == NULL) {
559 cout << "spreadsheet::print_table_row token[t] == NULL, "
560 "t = " << t << endl;
561 }
562 else {
563 ost << tokens[t];
564 }
565 if (f_enclose) {
566 ost << "\"";
567 }
568 }
569 if (j < nb_cols - 1) {
570 ost << ",";
571 }
572 }
573 ost << endl;
574}
575
577 int *f_column_select, int f_enclose_in_parentheses,
578 ostream &ost)
579{
580 int j, t, l; //, h;
581 int f_first = TRUE;
582
583 //cout << "Row " << row << " : ";
584 //ost << row;
585 for (j = 0; j < nb_cols; j++) {
586 if (f_column_select[j]) {
587 if (f_first) {
588 f_first = FALSE;
589 }
590 else {
591 ost << " & ";
592 }
593 t = Table[row * nb_cols + j];
594 if (t >= 0) {
595 #if 0
596 if (row == 0 && j == 0) {
597 cout << "printing token '" << tokens[t] << "'" << endl;
598 for (h = 0; h < 10; h++) {
599 cout << h << " : " << (int) tokens[t][h] << endl;
600 }
601 }
602 #endif
603 if (f_enclose_in_parentheses) {
604 ost << "\"";
605 }
606 if (tokens[t][0] == '"') {
607 tokens[t][0] = ' ';
608 }
609 l = strlen(tokens[t]);
610 if (tokens[t][l - 1] == '"') {
611 tokens[t][l - 1] = ' ';
612 }
613 ost << tokens[t];
614 if (f_enclose_in_parentheses) {
615 ost << "\"";
616 }
617 }
618 }
619 }
620 ost << "\\\\" << endl;
621}
622
623void spreadsheet::print_table_row_detailed(int row, ostream &ost)
624{
625 int j, t;
626
627 ost << "Row " << row << " of the table is:" << endl;
628 for (j = 0; j < nb_cols; j++) {
629 ost << "Column " << j << " / " << nb_cols << " : ";
630 t = Table[row * nb_cols + j];
631 if (t >= 0) {
632 ost << tokens[t];
633 }
634 if (j < nb_cols - 1) {
635 ost << ",";
636 }
637 ost << endl;
638 }
639}
640
642 int f_enclose_in_parentheses,
643 int *Col_selection, int nb_cols_selected, std::ostream &ost)
644{
645 int j, t, h;
646 int f_enclose;
647
648 //cout << "Row " << row << " : ";
649 for (h = 0; h < nb_cols_selected; h++) {
650 j = Col_selection[h] + 1;
651 t = Table[row * nb_cols + j];
652 if (t >= 0) {
653#if 0
654 if (row == 0 && j == 0) {
655 cout << "printing token '" << tokens[t] << "'" << endl;
656 for (h = 0; h < 10; h++) {
657 cout << h << " : " << (int) tokens[t][h] << endl;
658 }
659 }
660#endif
661 if (tokens[t][0] == '\"') {
662 f_enclose = FALSE;
663 }
664 else {
665 f_enclose = TRUE;
666 }
667 if (f_enclose) {
668 ost << "\"";
669 }
670 if (tokens[t] == NULL) {
671 cout << "spreadsheet::print_table_row_with_column_selection token[t] == NULL, "
672 "t = " << t << endl;
673 }
674 else {
675 ost << tokens[t];
676 }
677 if (f_enclose) {
678 ost << "\"";
679 }
680 }
681 if (h < nb_cols_selected - 1) {
682 ost << ",";
683 }
684 }
685 ost << endl;
686}
687
688
690 int *f_selected, ostream &ost)
691{
692 int i;
693
694 //cout << "Table:" << endl;
695 for (i = 0; i < nb_rows; i++) {
696 if (!f_selected[i]) {
697 continue;
698 }
699 print_table_row(i, FALSE, ost);
700 }
701}
702
704 const char *sort_by)
705{
706 int i, t, ii;
707 int idx;
708 int *perm;
709 char **labels;
710 sorting Sorting;
711
712 idx = find_by_column(sort_by);
713 perm = NEW_int(nb_rows - 1);
714 labels = NEW_pchar(nb_rows - 1);
715 for (i = 0; i < nb_rows - 1; i++) {
716 perm[i] = i;
717 t = Table[(i + 1) * nb_cols + idx];
718 if (t >= 0) {
719 if (tokens[t][0] == '"') {
720 labels[i] = NEW_char(strlen(tokens[t]) + 1);
721 strcpy(labels[i], tokens[t] + 1);
722 }
723 else {
724 labels[i] = NEW_char(strlen(tokens[t]) + 1);
725 strcpy(labels[i], tokens[t]);
726 }
727 }
728 else {
729 labels[i] = NEW_char(1);
730 labels[i][0] = 0;
731 }
732 }
733
734 Sorting.quicksort_array_with_perm(nb_rows - 1, (void **) labels, perm,
735 string_tools_compare_strings, NULL /*void *data*/);
736
737
738 //cout << "Table:" << endl;
739 for (i = 0; i < nb_rows; i++) {
740 if (i == 0) {
741 ii = 0;
742 }
743 else {
744 ii = perm[i - 1] + 1;
745 }
746 print_table_row(ii, FALSE, ost);
747 }
748}
749
750
751void spreadsheet::add_column_with_constant_value(const char *label, char *value)
752{
753 int i;
754
756 add_token(label);
757 Table[0 * nb_cols + nb_cols - 1] = nb_tokens - 1;
758 for (i = 1; i < nb_rows; i++) {
759 add_token(value);
760 Table[i * nb_cols + nb_cols - 1] = nb_tokens - 1;
761 }
762
763}
764
765void spreadsheet::add_column_with_int(const char *label, int *Value)
766{
767 int i;
768 char str[1000];
769
771 add_token(label);
772 Table[0 * nb_cols + nb_cols - 1] = nb_tokens - 1;
773 for (i = 1; i < nb_rows; i++) {
774 snprintf(str, 1000, "%d", Value[i - 1]);
775 add_token(str);
776 Table[i * nb_cols + nb_cols - 1] = nb_tokens - 1;
777 }
778
779}
780void spreadsheet::add_column_with_text(const char *label, char **Value)
781{
782 int i;
783
785 add_token(label);
786 Table[0 * nb_cols + nb_cols - 1] = nb_tokens - 1;
787 for (i = 1; i < nb_rows; i++) {
788 add_token(Value[i - 1]);
789 Table[i * nb_cols + nb_cols - 1] = nb_tokens - 1;
790 }
791
792}
793
795{
796 int i, j;
797 int *Table2;
798
799 Table2 = NEW_int(nb_rows * (nb_cols + 1));
800
801 for (i = 0; i < nb_rows; i++) {
802 for (j = 0; j < nb_cols; j++) {
803 Table2[i * (nb_cols + 1) + j] = Table[i * nb_cols + j];
804 }
805 Table2[i * (nb_cols + 1) + nb_cols] = -1;
806 }
808 Table = Table2;
809 nb_cols++;
810}
811
813{
814 int i, j;
815 int *Table2;
816
817 Table2 = NEW_int((nb_rows + 1) * nb_cols);
818
819 for (i = 0; i < nb_rows; i++) {
820 for (j = 0; j < nb_cols; j++) {
821 Table2[i * nb_cols + j] = Table[i * nb_cols + j];
822 }
823 }
824 for (j = 0; j < nb_cols; j++) {
825 Table2[nb_rows * nb_cols + j] = -1;
826 }
828 Table = Table2;
829 nb_rows++;
830}
831
832int spreadsheet::find_column(std::string &column_label)
833{
834 return find_by_column(column_label.c_str());
835}
836
837int spreadsheet::find_by_column(const char *join_by)
838{
839 int j, t, c; //, h;
840
841 for (j = 0; j < nb_cols; j++) {
842 t = Table[0 * nb_cols + j];
843 if (t >= 0) {
844 c = strncmp(tokens[t], join_by, strlen(join_by));
845#if 0
846 cout << "comparing '" << tokens[t] << "' with '"
847 << join_by << "' yields " << c << endl;
848 for (h = 0; h < (int)strlen(join_by); h++) {
849 cout << h << " : " << tokens[t][h] << " : "
850 << join_by[h] << endl;
851 }
852#endif
853 if (c == 0) {
854 return j;
855 }
856 }
857 }
858 // in case we don't find it, maybe it is because the labels
859 //are all encapsulated in \" signs
860 char join_by_in_quotes[1000];
861
862 snprintf(join_by_in_quotes, 1000, "\"%s",join_by);
863 for (j = 0; j < nb_cols; j++) {
864 t = Table[0 * nb_cols + j];
865 if (t >= 0) {
866 c = strncmp(tokens[t], join_by_in_quotes,
867 strlen(join_by_in_quotes));
868#if 0
869 cout << "comparing '" << tokens[t] << "' with '"
870 << join_by << "' yields " << c << endl;
871 for (h = 0; h < (int)strlen(join_by); h++) {
872 cout << h << " : " << (int) tokens[t][h] << " : "
873 << (int) join_by[h] << endl;
874 }
875#endif
876 if (c == 0) {
877 return j;
878 }
879 }
880 }
881 cout << "by column not found, join_by='" << join_by << "'" << endl;
882 cout << "The first row of the table is:" << endl;
884 //print_table(cout);
885 //cout << "by column not found" << endl;
886 exit(1);
887}
888
889void spreadsheet::tokenize(std::string &fname,
890 char **&tokens, int &nb_tokens, int verbose_level)
891{
892 int f_v = (verbose_level >= 1);
893 int f_vv = (verbose_level >= 2);
894 char *buf;
895 const char *p_buf;
896 char *str;
897 int sz;
898 int i; //, r;
900
901 if (f_v) {
902 cout << "spreadsheet::tokenize file=" << fname << endl;
903 cout << "spreadsheet::tokenize verbose_level="
904 << verbose_level << endl;
905 }
906
907 sz = Fio.file_size(fname);
908
909 buf = NEW_char(sz + 1);
910 str = NEW_char(sz + 1);
911
912 {
913 string_tools ST;
914 ifstream fp(fname);
915 i = 0;
916 while (TRUE) {
917 if (fp.eof()) {
918 break;
919 }
920 fp.getline(buf, sz, '\n');
921 if (f_vv) {
922 cout << "Line read :'" << buf << "'" << endl;
923 }
924 p_buf = buf;
925 if (strncmp(buf, "END", 3) == 0) {
926 break;
927 }
928
929 #if 0
930 // delete negative characters:
931 int len = strlen(buf);
932 for (i = 0, j = 0; i < len; i++) {
933 if ((int) buf[i] >= 0) {
934 buf[j++] = buf[i];
935 }
936 else {
937 cout << "spreadsheet::tokenize skipping "
938 "negative character" << endl;
939 }
940 }
941 buf[j] = 0;
942 #endif
943
944 //i = 0;
945 while (TRUE) {
946 if (*p_buf == 0) {
947 break;
948 }
949 //s_scan_token(&p_buf, str);
950 //s_scan_token(&p_buf, str);
951 /* r =*/ ST.s_scan_token_comma_separated(&p_buf, str);
952
953 if (f_vv) {
954 cout << "Token " << setw(6) << i << " is '"
955 << str << "'" << endl;
956 }
957 #if 0
958 if (strcmp(str, ",") == 0) {
959 continue;
960 }
961 #endif
962 i++;
963 }
964 i++; // End of line
965 }
966 }
967 nb_tokens = i;
968
969
970 //f_vv = TRUE;
971
972
974 {
975 string_tools ST;
976 ifstream fp(fname);
977 i = 0;
978 while (TRUE) {
979 if (fp.eof()) {
980 break;
981 }
982 fp.getline(buf, sz, '\n');
983 p_buf = buf;
984 if (strncmp(buf, "END", 3) == 0) {
985 break;
986 }
987 if (f_vv) {
988 cout << "read line '" << p_buf << "'" << " i=" << i << endl;
989 }
990
991 #if 0
992 // delete negative characters:
993 int len = strlen(buf);
994 for (i = 0, j = 0; i < len; i++) {
995 if ((int) buf[i] >= 0) {
996 buf[j++] = buf[i];
997 }
998 else {
999 cout << "spreadsheet::tokenize skipping "
1000 "negative character" << endl;
1001 }
1002 }
1003 buf[j] = 0;
1004 #endif
1005
1006 //i = 0;
1007 while (TRUE) {
1008 if (*p_buf == 0) {
1009 break;
1010 }
1011 //s_scan_token(&p_buf, str);
1012 //s_scan_token(&p_buf, str);
1013 /*r = */ ST.s_scan_token_comma_separated(&p_buf, str);
1014 #if 0
1015 if (strcmp(str, ",") == 0) {
1016 continue;
1017 }
1018 #endif
1019 tokens[i] = NEW_char(strlen(str) + 1);
1020 strcpy(tokens[i], str);
1021 if (f_vv) {
1022 cout << "Token " << setw(6) << i << " is '"
1023 << tokens[i] << "'" << endl;
1024 }
1025 i++;
1026 }
1027
1028 #if 1
1029 snprintf(str, sz, "END_OF_LINE");
1030 tokens[i] = NEW_char(strlen(str) + 1);
1031 strcpy(tokens[i], str);
1032 if (f_vv) {
1033 cout << "Token " << setw(6) << i << " is '"
1034 << tokens[i] << "'" << endl;
1035 }
1036 i++;
1037 #endif
1038
1039 }
1040 }
1041 FREE_char(buf);
1042 FREE_char(str);
1043}
1044
1045void spreadsheet::remove_quotes(int verbose_level)
1046{
1047 int i, j, h, l, t;
1048
1049 for (i = 1; i < nb_rows; i++) {
1050 for (j = 0; j < nb_cols; j++) {
1051 t = Table[i * nb_cols + j];
1052 if (t < 0) {
1053 continue;
1054 }
1055 if (tokens[t][0] == '"') {
1056 l = strlen(tokens[t]);
1057 for (h = 1; h < l; h++) {
1058 tokens[t][h - 1] = tokens[t][h];
1059 }
1060 tokens[t][l - 1] = 0;
1061 }
1062 l = strlen(tokens[t]);
1063 if (l && tokens[t][l - 1] == '"') {
1064 tokens[t][l - 1] = 0;
1065 }
1066 }
1067 }
1068}
1069
1070void spreadsheet::remove_rows(const char *drop_column,
1071 const char *drop_label, int verbose_level)
1072{
1073 int f_v = (verbose_level >= 1);
1074 //int f_vv = (verbose_level >= 2);
1075 int i, j, h, t, idx, nbr, f_delete;
1076
1077 if (f_v) {
1078 cout << "spreadsheet::remove_rows" << endl;
1079 }
1080 nbr = nb_rows;
1081 idx = find_by_column(drop_column);
1082 cout << "drop column is " << idx << endl;
1083 cout << "drop label is " << drop_label << endl;
1084 h = 1;
1085 for (i = 1; i < nb_rows; i++) {
1086 t = Table[i * nb_cols + idx];
1087 if (t >= 0 && strcmp(tokens[t], drop_label) == 0) {
1088 f_delete = TRUE;
1089 }
1090 else {
1091 f_delete = FALSE;
1092 }
1093 if (!f_delete) {
1094 for (j = 0; j < nb_cols; j++) {
1095 Table[h * nb_cols + j] = Table[i * nb_cols + j];
1096 }
1097 h++;
1098 }
1099 }
1100 nb_rows = h;
1101 if (f_v) {
1102 cout << "spreadsheet::remove_rows, removed "
1103 << nbr - nb_rows << " rows" << endl;
1104 }
1105}
1106
1108 const char *drop_column, int verbose_level)
1109{
1110 int f_v = (verbose_level >= 1);
1111 //int f_vv = (verbose_level >= 2);
1112 int i, j, h, t, idx, nbr, f_delete;
1113
1114 if (f_v) {
1115 cout << "spreadsheet::remove_rows_where_field_is_empty" << endl;
1116 }
1117 nbr = nb_rows;
1118 idx = find_by_column(drop_column);
1119 cout << "drop column is " << idx << endl;
1120 h = 1;
1121 for (i = 1; i < nb_rows; i++) {
1122 t = Table[i * nb_cols + idx];
1123 if (t == -1) {
1124 f_delete = TRUE;
1125 }
1126 else if (t >= 0 && strlen(tokens[t]) == 0) {
1127 f_delete = TRUE;
1128 }
1129 else {
1130 f_delete = FALSE;
1131 }
1132 if (!f_delete) {
1133 for (j = 0; j < nb_cols; j++) {
1134 Table[h * nb_cols + j] = Table[i * nb_cols + j];
1135 }
1136 h++;
1137 }
1138 }
1139 nb_rows = h;
1140 if (f_v) {
1141 cout << "spreadsheet::remove_rows_where_field_is_empty, "
1142 "removed " << nbr - nb_rows << " rows" << endl;
1143 }
1144}
1145
1146void spreadsheet::find_rows(int verbose_level)
1147{
1148 int f_v = (verbose_level >= 1);
1149 int f_vv = (verbose_level >= 2);
1150 int i, cnt;
1151
1152 if (f_v) {
1153 cout << "find_rows" << endl;
1154 }
1155 cnt = 0;
1156
1157 for (i = 0; i < nb_tokens; i++) {
1158 if (strcmp(tokens[i], "END_OF_LINE") == 0) {
1159 cnt++;
1160 }
1161 }
1162 nb_lines = cnt;
1165
1166 cnt = 0;
1167
1168 line_start[cnt] = 0;
1169 for (i = 0; i < nb_tokens; i++) {
1170 if (f_vv) {
1171 cout << "cnt=" << cnt << " i=" << i
1172 << " tokens[i]=" << tokens[i] << endl;
1173 }
1174 if (strcmp(tokens[i], "END_OF_LINE") == 0) {
1175 line_size[cnt] = i - line_start[cnt];
1176 if (f_v) {
1177 cout << "end of line" << endl;
1178 }
1179 cnt++;
1180 line_start[cnt] = i + 1;
1181 }
1182 }
1183}
1184
1186 double &val, int &f_NA)
1187{
1188 string str;
1189 string_tools ST;
1190
1191 get_string(str, i, j);
1192 cout << "spreadsheet::get_value_double_or_NA str=" << str << endl;
1193 if (ST.stringcmp(str, "NA") == 0) {
1194 val = 0;
1195 f_NA = TRUE;
1196 }
1197 else {
1198 val = get_double(i, j);
1199 f_NA = FALSE;
1200 }
1201}
1202
1203#if 0
1204void spreadsheet::get_string_entry(std::string &entry, int i, int j)
1205{
1206 char *p;
1207
1208 p = get_string(i, j);
1209 entry.assign(p);
1210 FREE_char(p);
1211}
1212#endif
1213
1214void spreadsheet::get_string(std::string &str, int i, int j)
1215{
1216 int t;
1217 //char *str;
1218 //char *s;
1219
1220 t = Table[i * nb_cols + j];
1221 //cout << "t=" << t << endl;
1222 if (t == -1) {
1223 str.assign("");
1224 //s = NEW_char(1);
1225 //strcpy(s, "");
1226 }
1227 else {
1228 //str = NEW_char(strlen(tokens[t]) + 1);
1229 if (strlen(tokens[t]) >= 2 && tokens[t][0] == '"') {
1230 str.assign(tokens[t] + 1);
1231 //strcpy(str, tokens[t] + 1);
1232 //str[strlen(str) - 1] = 0;
1233 }
1234 else {
1235 str.assign(tokens[t]);
1236 //strcpy(str, tokens[t]);
1237 }
1238
1239 //s = NEW_char(strlen(str) + 1);
1240 //strcpy(s, str);
1241 //FREE_char(str);
1242 }
1243 //return s;
1244}
1245
1246long int spreadsheet::get_int(int i, int j)
1247{
1248 string str;
1249 long int a;
1250 string_tools ST;
1251
1252 get_string(str, i, j);
1253
1254 a = ST.strtolint(str);
1255
1256 return a;
1257}
1258
1259double spreadsheet::get_double(int i, int j)
1260{
1261 string str;
1262 double a;
1263 string_tools ST;
1264
1265 get_string(str, i, j);
1266 a = ST.strtof(str);
1267 return a;
1268}
1269
1271 int by1, int by2, int verbose_level)
1272{
1273 int f_v = (verbose_level >= 1);
1274 int f_vv = (verbose_level >= 2);
1275 int f_v3 = (verbose_level >= 3);
1276 //int by1, by2;
1277 int j1, j2, t1, t2;
1278 int i1, i2;
1279 char *label2;
1280 int tt1, tt2;
1281 int f_need_to_add;
1282 string_tools ST;
1283
1284
1285 if (f_v) {
1286 cout << "spreadsheet::join_with" << endl;
1287 cout << "verbose_level=" << verbose_level << endl;
1288 }
1289 //by1 = find_by_column(join_by);
1290 //by2 = S2->find_by_column(join_by);
1291
1292 if (f_vv) {
1293 cout << "by1=" << by1 << " by2=" << by2 << endl;
1294 }
1295
1296
1297
1298 for (i2 = 1; i2 < S2->nb_rows; i2++) {
1299 char *T2;
1300 t2 = S2->Table[i2 * S2->nb_cols + by2];
1301 if (t2 == -1) {
1302 continue;
1303 }
1304 T2 = S2->tokens[t2];
1305 if (strlen(T2) == 0) {
1306 continue;
1307 }
1308 for (i1 = 1; i1 < nb_rows; i1++) {
1309 if (Table[i1 * nb_cols + by1] == -1) {
1310 continue;
1311 }
1312 //cout << "i1=" << i1 << " label="
1313 //<< tokens[Table[i1 * nb_cols + by1]] << endl;
1315 tokens[Table[i1 * nb_cols + by1]], T2) == 0) {
1316 break;
1317 }
1318 }
1319 if (i1 == nb_rows) {
1320 cout << "adding a row corresponding to " << T2 << endl;
1322 add_token(T2);
1323 Table[i1 * nb_cols + by1] = nb_tokens - 1;
1324 }
1325 }
1326
1327
1328 for (j2 = 0; j2 < S2->nb_cols; j2++) {
1329 if (f_vv) {
1330 cout << "j2=" << j2 << endl;
1331 }
1332 if (j2 == by2) {
1333 continue;
1334 }
1335 t2 = S2->Table[j2];
1336 if (t2 == -1) {
1337 continue;
1338 }
1339 if (f_vv) {
1340 cout << "joining column " << S2->tokens[t2] << endl;
1341 }
1342
1343 for (j1 = 0; j1 < nb_cols; j1++) {
1344 if (j1 == by1) {
1345 continue;
1346 }
1347 t1 = Table[j1];
1348 if (t1 == -1) {
1349 continue;
1350 }
1351 if (ST.strcmp_with_or_without(tokens[t1], S2->tokens[t2]) == 0) {
1352 break;
1353 }
1354 }
1355 if (j1 == nb_cols) {
1356 // reallocate Table
1357 cout << "reallocating table" << endl;
1359 cout << "reallocating table done" << endl;
1360 add_token(S2->tokens[t2]);
1361 Table[0 * nb_cols + j1] = nb_tokens - 1;
1362 cout << "added token " << S2->tokens[t2]
1363 << " as a column heading" << endl;
1364 }
1365 t1 = Table[j1];
1366
1367 if (f_vv) {
1368 cout << "joining columns " << tokens[t1] << " and "
1369 << S2->tokens[t2] << endl;
1370 }
1371
1372 for (i2 = 1; i2 < S2->nb_rows; i2++) {
1373 if (f_v3) {
1374 cout << "i2=" << i2 << endl;
1375 }
1376 tt2 = S2->Table[i2 * S2->nb_cols + j2];
1377 if (f_v3) {
1378 cout << "tt2=" << tt2 << endl;
1379 }
1380 if (tt2 == -1) {
1381 continue;
1382 }
1383 if (S2->Table[i2 * S2->nb_cols + by2] == -1) {
1384 continue;
1385 }
1386 label2 = S2->tokens[S2->Table[i2 * S2->nb_cols + by2]];
1387 if (f_v3) {
1388 cout << "label2='" << label2 << "'" << endl;
1389 }
1390 for (i1 = 1; i1 < nb_rows; i1++) {
1391 if (Table[i1 * nb_cols + by1] == -1) {
1392 continue;
1393 }
1394 //cout << "i1=" << i1 << " label="
1395 //<< tokens[Table[i1 * nb_cols + by1]] << endl;
1397 tokens[Table[i1 * nb_cols + by1]], label2) == 0) {
1398 break;
1399 }
1400 }
1401 if (i1 == nb_rows) {
1402 cout << "entry " << label2 << " not found in "
1403 "first table" << endl;
1404 exit(1);
1405 //reallocate_table_add_row();
1406 //Table[i1 * nb_cols + by1] =
1407 //S2->Table[i2 * S2->nb_cols + by2];
1408 //exit(1);
1409 }
1410 else {
1411 cout << "label2 " << label2 << " found in row "
1412 << i1 << " in first table" << endl;
1413 }
1414 tt1 = Table[i1 * nb_cols + j1];
1415 f_need_to_add = TRUE;
1416 if (tt1 >= 0) {
1417 if (f_v3) {
1418 cout << "i1=" << i1 << " i2=" << i2 << " we have "
1419 << tokens[tt1] << " vs "
1420 << S2->tokens[tt2] << endl;
1421 }
1422 if (ST.strcmp_with_or_without(tokens[tt1],
1423 S2->tokens[tt2]) == 0) {
1424 f_need_to_add = FALSE;
1425 }
1426 }
1427 if (f_v3) {
1428 cout << "f_need_to_add=" << f_need_to_add << endl;
1429 }
1430 if (f_need_to_add) {
1431 if (f_v3) {
1432 cout << "adding token " << S2->tokens[tt2] << endl;
1433 }
1434 add_token(S2->tokens[tt2]);
1435 Table[i1 * nb_cols + j1] = nb_tokens - 1;
1436 if (f_v3) {
1437 cout << "added token " << S2->tokens[tt2]
1438 << " check: " << tokens[Table[i1 * nb_cols + j1]]
1439 << endl;
1440 }
1441 }
1442 else {
1443 if (f_v3) {
1444 cout << "no need to add" << endl;
1445 }
1446 }
1447 } // next i2
1448 }
1449 if (f_v) {
1450 cout << "spreadsheet::join_with done" << endl;
1451 }
1452}
1453
1454void spreadsheet::patch_with(spreadsheet *S2, char *join_by)
1455{
1456 int by1;
1457 int t0, t1, /*t2,*/ t3;
1458 int i1, i2;
1459 int what_idx;
1460 int nb_patch = 0;
1461
1462
1463 by1 = find_by_column(join_by);
1464
1465 cout << "spreadsheet::patch_with by1=" << by1 << endl;
1466 cout << "spreadsheet::patch_with S2->nb_rows=" << S2->nb_rows << endl;
1467
1468
1469
1470 for (i2 = 1; i2 < S2->nb_rows; i2++) {
1471 char *what;
1472 char *who;
1473 char *patch_value;
1474 t0 = S2->Table[i2 * S2->nb_cols + 0];
1475 t1 = S2->Table[i2 * S2->nb_cols + 1];
1476 //t2 = S2->Table[i2 * S2->nb_cols + 2];
1477 t3 = S2->Table[i2 * S2->nb_cols + 3];
1478 if (t0 == -1) {
1479 continue;
1480 }
1481 what = S2->tokens[t0];
1482 if (strlen(what) == 0) {
1483 continue;
1484 }
1485 if (strcmp(what, "-1") == 0) {
1486 break;
1487 }
1488 who = S2->tokens[t1];
1489 if (strlen(who) == 0) {
1490 continue;
1491 }
1492 patch_value = S2->tokens[t3];
1493
1494 for (i1 = 1; i1 < nb_rows; i1++) {
1495 if (Table[i1 * nb_cols + by1] == -1) {
1496 continue;
1497 }
1498 //cout << "i1=" << i1 << " label="
1499 //<< tokens[Table[i1 * nb_cols + by1]] << endl;
1500 if (strcmp(tokens[Table[i1 * nb_cols + by1]], who) == 0) {
1501 break;
1502 }
1503 }
1504 if (i1 == nb_rows) {
1505 cout << "spreadsheet::patch_with Did not find " << who
1506 << " in first table" << endl;
1507 }
1508 else {
1509 what_idx = find_by_column(what);
1510 add_token(patch_value);
1511 Table[i1 * nb_cols + what_idx] = nb_tokens - 1;
1512 cout << "patch " << nb_patch << " applied, " << who
1513 << " now has " << patch_value << " in " << what << endl;
1514 nb_patch++;
1515 }
1516 }
1517 cout << "spreadsheet::patch_with applied " << nb_patch
1518 << " patches" << endl;
1519
1520}
1521
1522
1523
1524//
1525
1526
1527
1528
1529
1530}}}
1531
1532
a collection of functions related to sorted vectors
void quicksort_array_with_perm(int len, void **v, int *perm, int(*compare_func)(void *a, void *b, void *data), void *data)
Definition: sorting.cpp:937
void print_table_row_detailed(int row, std::ostream &ost)
void read_spreadsheet(std::string &fname, int verbose_level)
void fill_column_with_text(int col_idx, const char **text, const char *heading)
void print_table_with_row_selection(int *f_selected, std::ostream &ost)
void init_int_matrix(int nb_rows, int nb_cols, int *A)
void join_with(spreadsheet *S2, int by1, int by2, int verbose_level)
void get_value_double_or_NA(int i, int j, double &val, int &f_NA)
void fill_column_with_row_index(int col_idx, const char *heading)
void print_table_row_latex(int row, int *f_column_select, int f_enclose_in_parentheses, std::ostream &ost)
void print_table_sorted(std::ostream &ost, const char *sort_by)
void tokenize(std::string &fname, char **&tokens, int &nb_tokens, int verbose_level)
void remove_rows(const char *drop_column, const char *drop_label, int verbose_level)
void fill_column_with_lint(int col_idx, long int *data, const char *heading)
void set_entry_lint(int row_idx, int col_idx, long int val)
void print_table_row(int row, int f_enclose_in_parentheses, std::ostream &ost)
void add_column_with_text(const char *label, char **Value)
void remove_rows_where_field_is_empty(const char *drop_column, int verbose_level)
void save(std::string &fname, int verbose_level)
void print_table_latex(std::ostream &ost, int *f_column_select, int f_enclose_in_parentheses, int nb_lines_per_table)
void print_table_latex_all_columns(std::ostream &ost, int f_enclose_in_parentheses)
void print_table_row_with_column_selection(int row, int f_enclose_in_parentheses, int *Col_selection, int nb_cols_selected, std::ostream &ost)
void add_column_with_constant_value(const char *label, char *value)
void init_set_of_sets(set_of_sets *S, int f_make_heading)
Definition: spreadsheet.cpp:59
void fill_column_with_int(int col_idx, int *data, const char *heading)
void fill_entry_with_text(int row_idx, int col_idx, const char *text)
void print_table(std::ostream &ost, int f_enclose_in_parentheses)
void add_column_with_int(const char *label, int *Value)
functions related to strings and character arrays
#define NEW_pchar(n)
Definition: foundations.h:635
#define FREE_pchar(p)
Definition: foundations.h:648
#define MINIMUM(x, y)
Definition: foundations.h:216
#define FREE_int(p)
Definition: foundations.h:640
#define NEW_char(n)
Definition: foundations.h:632
#define NEW_int(n)
Definition: foundations.h:625
#define TRUE
Definition: foundations.h:231
#define FALSE
Definition: foundations.h:234
#define FREE_char(p)
Definition: foundations.h:646
int string_tools_compare_strings(void *a, void *b, void *data)
the orbiter library for the classification of combinatorial objects