Orbiter 2022
Combinatorial Objects
tally_vector_data.cpp
Go to the documentation of this file.
1/*
2 * tally_vector_data.cpp
3 *
4 * Created on: May 17, 2019
5 * Author: betten
6 */
7
8
9
10
11
12#include "foundations.h"
13
14using namespace std;
15
16
17namespace orbiter {
18namespace layer1_foundations {
19namespace data_structures {
20
21
22
23static int classify_int_vec_compare_function(void *a, void *b, void *data);
24
26{
27 data_set_sz = 0;
28 data_length = 0;
29 data = NULL;
30 rep_idx = NULL;
31 Reps = NULL;
32 Frequency = NULL;
33 sorting_perm = NULL;
34 sorting_perm_inv = NULL;
35 nb_types = 0;
36 type_first = NULL;
37 Reps_in_lex_order = NULL;
39}
40
41
43{
44 if (rep_idx) {
46 }
47 if (Reps) {
49 }
50 if (Frequency) {
52 }
53 if (sorting_perm) {
55 }
56 if (sorting_perm_inv) {
58 }
59 if (type_first) {
61 }
63 int i;
64
65 for (i = 0; i < nb_types; i++) {
67 }
69 }
72 }
73}
74
75
76void tally_vector_data::init(int *data, int data_length, int data_set_sz,
77 int verbose_level)
78// data[data_length * data_set_sz]
79{
80 int f_v = (verbose_level >= 1);
81 int i;
84
85 if (f_v) {
86 cout << "tally_vector_data::init" << endl;
87 }
91
95 nb_types = 0;
97
98 uint32_t h;
99 int idx, a;
100
101 if (f_v) {
102 cout << "tally_vector_data::init starting to collect data" << endl;
103 }
104 for (i = 0; i < data_length; i++) {
105 if (FALSE) {
106 cout << "tally_vector_data::init starting to collect data i=" << i << " / " << data_length << endl;
107 }
108 if (!hash_and_find(data + i * data_set_sz,
109 idx, h, verbose_level)) {
110 Hashing.insert(pair<uint32_t, int>(h, nb_types));
114 Frequency[nb_types] = 1;
115 rep_idx[i] = nb_types;
116 nb_types++;
117 }
118 else {
119 rep_idx[i] = idx;
120 Frequency[idx]++;
121 }
122 }
123 if (f_v) {
124 cout << "tally_vector_data::init finished collecting data" << endl;
125 }
126
127 for (i = 0; i < nb_types; i++) {
128 if (i == 0) {
129 type_first[i] = 0;
130 }
131 else {
132 type_first[i] = type_first[i - 1] + Frequency[i - 1];
133 }
134 }
135
136 int *Frequency2;
137
140
141 Frequency2 = NEW_int(nb_types);
142 Int_vec_zero(Frequency2, nb_types);
144 for (i = 0; i < data_length; i++) {
145 a = rep_idx[i];
146 sorting_perm_inv[type_first[a] + Frequency2[a]++] = i;
147 }
149
150 FREE_int(Frequency2);
151
152
153 if (f_v) {
154 cout << "tally_vector_data::init computing Reps_in_lex_order" << endl;
155 }
156
159
160 int nb_types2, j;
161
162 nb_types2 = 0;
163 for (i = 0; i < nb_types; i++) {
164 if (Sorting.vec_search((void **)Reps_in_lex_order,
165 classify_int_vec_compare_function, this,
166 nb_types2,
167 Reps + i * data_set_sz,
168 idx,
169 0 /* verbose_level */)) {
170 cout << "tally_vector_data::init error!" << endl;
171 exit(1);
172 }
173 else {
174 for (j = nb_types2; j > idx; j--) {
177 }
182 nb_types2++;
183 }
184 }
185
186
187 if (f_v) {
188 cout << "tally_vector_data::init done" << endl;
189 }
190
191}
192
194 int &idx, uint32_t &h, int verbose_level)
195{
196 int f_found;
199
200
202
203 map<uint32_t, int>::iterator itr, itr1, itr2;
204
205 itr1 = Hashing.lower_bound(h);
206 itr2 = Hashing.upper_bound(h);
207 f_found = FALSE;
208 for (itr = itr1; itr != itr2; ++itr) {
209 idx = itr->second;
210 if (Sorting.int_vec_compare(data,
211 Reps + idx * data_set_sz,
212 data_set_sz) == 0) {
213 f_found = TRUE;
214 break;
215 }
216 }
217 return f_found;
218}
219
220
222{
223 //uint32_t h;
224 int i;
225
226 for (i = 0; i < nb_types; i++) {
227
228 //h = int_vec_hash(Reps + i * data_set_sz, data_set_sz);
229
230 cout << Frequency[i] << " x ";
232 cout << endl;
233#if 0
234 cout << "for elements ";
235 int_vec_print(cout, sorting_perm_inv + type_first[i], Frequency[i]);
236 cout << endl;
237#endif
238 }
239}
240
241void tally_vector_data::save_classes_individually(std::string &fname, int verbose_level)
242{
243 int f_v = (verbose_level >= 1);
244 //uint32_t h;
245 int i, j;
247
248 if (f_v) {
249 cout << "tally_vector_data::save_classes_individually fname = " << fname << endl;
250 }
251 if (f_v) {
252 cout << "tally_vector_data::save_classes_individually nb_types = " << nb_types << endl;
253 }
254 for (i = 0; i < nb_types; i++) {
255
256 string fname2;
257 char str[10000];
258
259 fname2.assign(fname);
260 for (j = 0; j < data_set_sz; j++) {
261 sprintf(str, "%d", Reps[i * data_set_sz + j]);
262 fname2.append(str);
263 }
264 fname2.append(".csv");
265
266 //h = int_vec_hash(Reps + i * data_set_sz, data_set_sz);
267
268 if (f_v) {
269 cout << "tally_vector_data::save_classes_individually saving file = " << fname2 << endl;
270 }
271 Fio.int_vec_write_csv(sorting_perm_inv + type_first[i], Frequency[i], fname2, "case");
272 cout << "Written file " << fname2 << " of size " << Fio.file_size(fname2) << endl;
273 }
274
275 string fname2;
276
277 fname2.assign(fname);
278 fname2.append("_all_in_one.csv");
279 Fio.int_vec_write_csv(sorting_perm_inv, data_length, fname2, "case");
280 cout << "Written file " << fname2 << " of size " << Fio.file_size(fname2) << endl;
281
282
283 if (f_v) {
284 cout << "tally_vector_data::save_classes_individually" << endl;
285 }
286}
287
289 int *&transversal, int *&frequency, int &nb_types, int verbose_level)
290{
291 int f_v = (verbose_level >= 1);
292 int i, f, l;
293
294 if (f_v) {
295 cout << "tally_vector_data::get_transversal" << endl;
296 }
297
299 transversal = NEW_int(nb_types);
300 frequency = NEW_int(nb_types);
301 for (i = 0; i < nb_types; i++) {
302 f = type_first[i];
303 l = Frequency[i];
304 transversal[i] = sorting_perm_inv[f + 0];
305 frequency[i] = l;
306 }
307
308 if (f_v) {
309 cout << "tally_vector_data::get_transversal done" << endl;
310 }
311}
312
314{
315 int f_v = (verbose_level >= 1);
316 int i, j, a, f, l;
317
318 if (f_v) {
319 cout << "tally_vector_data::print_classes_bigger_than_one" << endl;
320 }
321
322 for (i = 0; i < nb_types; i++) {
323 f = type_first[i];
324 l = Frequency[i];
325 if (l > 1) {
326 cout << "class " << i << " has size " << l << " : ";
327 for (j = 0; j < l; j++) {
328 a = sorting_perm_inv[f + j];
329 cout << a;
330 if (j < l - 1) {
331 cout << ", ";
332 }
333 }
334 cout << endl;
335 }
336 }
337
338 if (f_v) {
339 cout << "tally_vector_data::print_classes_bigger_than_one done" << endl;
340 }
341}
342
343
344static int classify_int_vec_compare_function(void *a, void *b, void *data)
345{
347 int *A = (int *) a;
348 int *B = (int *) b;
349 int i;
350
351 for (i = 0; i < C->data_set_sz; i++) {
352 if (A[i] > B[i]) {
353 return 1;
354 }
355 if (A[i] < B[i]) {
356 return -1;
357 }
358 }
359 return 0;
360
361}
362
363}}}
364
365
a catch-all container class for everything related to data structures
a collection of functions related to sorted vectors
int vec_search(void **v, int(*compare_func)(void *a, void *b, void *data), void *data_for_compare, int len, void *a, int &idx, int verbose_level)
Definition: sorting.cpp:945
a statistical analysis of data consisting of vectors of ints
int hash_and_find(int *data, int &idx, uint32_t &h, int verbose_level)
void get_transversal(int *&transversal, int *&frequency, int &nb_types, int verbose_level)
void save_classes_individually(std::string &fname, int verbose_level)
void init(int *data, int data_length, int data_set_sz, int verbose_level)
void int_vec_write_csv(int *v, int len, std::string &fname, const char *label)
Definition: file_io.cpp:1175
#define FREE_int(p)
Definition: foundations.h:640
#define Int_vec_zero(A, B)
Definition: foundations.h:713
#define NEW_pint(n)
Definition: foundations.h:627
#define NEW_int(n)
Definition: foundations.h:625
#define TRUE
Definition: foundations.h:231
#define FALSE
Definition: foundations.h:234
#define Int_vec_copy(A, B, C)
Definition: foundations.h:693
#define FREE_pint(p)
Definition: foundations.h:641
#define Int_vec_print(A, B, C)
Definition: foundations.h:685
the orbiter library for the classification of combinatorial objects