/* Protects and reconstructs files by storing with redundancy. Copyright (C) 2004 Christopher J. Hazard This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /* Written by Chris Hazard . */ /* Developer notes: Utility to protect files against corruption due to bad disks/cd's/flash drives/hard disks. It can be used in a similar fashion to gzip, but it doesn't have a lot of command-line power yet (I'll add that if people start using it). It stores 3 copies of the file in one file, and when extracting, it votes on the results, breaking it down to bits when need be. Most error correction methods are geared for errors on the size of a few bits, and thus don't do well when blocks fail on drives. This program can drastically improve your chances of correctly transfering or storing a file. If you make changes to it, it'd be great if you could e-mail me and let me know. */ #include /*size that the data is chunked into for processing*/ #define BLOCKSIZE 1024 /*increase this for more redundancy. number of times the file is written to the protection file*/ #define NUM_SEGMENTS 3 const char segment_divider[] = "protect-segment-a%03d file(%d):%s protect-segment-b%03d file(%d):%s "; #define SEGMENT_DIVIDER_LENGTH_LENGTH 5 const char segment_divider_length[] = "%05d"; /*used to contain the data to be verified*/ char buffer[NUM_SEGMENTS][BLOCKSIZE]; /*error counts*/ int num_block_errors; int num_byte_errors; int num_bit_errors; void protect(char *filename) { char buffer[BLOCKSIZE]; char buffer2[6]; int num_bytes; int segment; FILE *inf, *outf; inf = fopen(filename, "rb"); if(inf == NULL) return; sprintf(buffer, "%s.prot", filename); outf = fopen(buffer, "wb"); if(outf == NULL) return; /*write out NUM_SEGMENTS copies of the file, with divider between them*/ for(segment = 0; segment < NUM_SEGMENTS; segment++) { rewind(inf); while(num_bytes = fread(buffer, 1, BLOCKSIZE, inf)) fwrite(buffer, 1, num_bytes, outf); sprintf(buffer, segment_divider, segment, strlen(filename), filename, segment, strlen(filename), filename); sprintf(buffer2, segment_divider_length, strlen(buffer)+SEGMENT_DIVIDER_LENGTH_LENGTH); strcat(buffer, buffer2); fwrite(buffer, 1, strlen(buffer), outf); } fclose(inf); fclose(outf); } /*replaces buffer[0][offset] with the voted result, only checking and overwriting those bits specified by bitmask*/ int ensure_offset_bitmask(int offset, int bitmask) { #define BUF(s) (buffer[s][offset] & bitmask) int segment; /*contains number of votes for each value (index = value)*/ int value_count[256]; unsigned int max_votes; /*offset of value with the most votes*/ unsigned int max_votes2; /*set to the 2nd most number of votes -if same as max_votes, then go to a bit-by-bit comparison*/ /*bytes didn't match, so need to vote*/ for(segment = 0; segment < NUM_SEGMENTS; segment++) value_count[BUF(segment)] = 0; for(segment = 0; segment < NUM_SEGMENTS; segment++) value_count[BUF(segment)]++; /*find the most popular value, if more than one, then check bits*/ max_votes = BUF(0); max_votes2 = BUF(NUM_SEGMENTS-1); /*start from the other end*/ for(segment = 1; segment < NUM_SEGMENTS; segment++) if(value_count[BUF(segment)] > value_count[max_votes]) max_votes = BUF(segment); /*find 2nd most popular, just so long as not the same as first*/ for(segment = 1; segment < NUM_SEGMENTS; segment++) if(value_count[BUF(segment)] > value_count[max_votes2] && BUF(segment) != max_votes) max_votes2 = BUF(segment); /*if max is the same as 2nd max, or max is greater than 2nd max, then accept*/ if(max_votes == max_votes2 || value_count[max_votes] > value_count[max_votes2]) { buffer[0][offset] &= ~bitmask; buffer[0][offset] |= max_votes; return 1; } return 0; /*failed*/ #undef BUF } /*uses voting methods to put most likely copy into buffer[0]*/ void ensure_block(int block_size) { int segment, all_good = 1, i, j; /*check if all buffers match, if so, return*/ for(segment = 0; segment < NUM_SEGMENTS-1; segment++) if(memcmp(buffer[segment], buffer[segment+1], block_size)) { /*didn't match, so this block needs to be checked with finer grain*/ all_good = 0; break; } if(all_good) return; /*something didn't match in this current block*/ num_block_errors++; /*check on a byte-by-byte level, seeing which bytes match*/ for(i = 0; i < block_size; i++) { /*check if each byte matches, if so, go ahead*/ all_good = 1; for(segment = 0; segment < NUM_SEGMENTS-1; segment++) if(buffer[segment][i] != buffer[segment+1][i]) { all_good = 0; break; } if(all_good) continue; /*discrepancy in this current byte*/ num_byte_errors++; /*vote on byte*/ if(ensure_offset_bitmask(i, 0xFF)) continue; /*vote on byte failed*/ num_bit_errors++; /*vote on bits individually*/ for(j = 0; j < 8; j++) ensure_offset_bitmask(i, 1 << j); } } void deprotect(char *filename) { FILE *inf[NUM_SEGMENTS], *outf; int segment, segment_size, file_size, pos, block_size; num_block_errors = 0; num_byte_errors = 0; num_bit_errors = 0; inf[0] = fopen(filename, "rb"); if(inf[0] == NULL) return; /*get file and segment sizes*/ fseek(inf[0], 0, SEEK_END); file_size = ftell(inf[0]); segment_size = file_size / NUM_SEGMENTS; for(segment = 0; segment < NUM_SEGMENTS; segment++) { fseek(inf[0], (segment + 1) * segment_size - SEGMENT_DIVIDER_LENGTH_LENGTH, SEEK_SET); fread(buffer[segment], 1, SEGMENT_DIVIDER_LENGTH_LENGTH, inf[0]); buffer[segment][SEGMENT_DIVIDER_LENGTH_LENGTH] = '\0'; } ensure_block(SEGMENT_DIVIDER_LENGTH_LENGTH); segment_size -= atoi(buffer[0]); rewind(inf[0]); /*chop off .prot extension, or add .unprot if it doesn't have one*/ strcpy(buffer[0], filename); if(strlen(filename) > 5 && !strcmp(&filename[strlen(filename)-5], ".prot")) buffer[0][strlen(filename)-5] = '\0'; else strcat(buffer[0], ".unprot"); /***should check to make sure don't clobber new file???*/ outf = fopen(buffer[0], "wb"); if(outf == NULL) { fclose(inf[0]); return; } /*open additional file handles for easier input comparison, and point to starting of each segment*/ for(segment = 1; segment < NUM_SEGMENTS; segment++) { inf[segment] = fopen(filename, "rb"); fseek(inf[segment], (segment * file_size) / NUM_SEGMENTS, SEEK_SET); } /*merge in all the segments*/ pos = 0; while(pos < segment_size) { block_size = (segment_size - pos > BLOCKSIZE ? BLOCKSIZE : segment_size - pos); for(segment = 0; segment < NUM_SEGMENTS; segment++) fread(buffer[segment], 1, block_size, inf[segment]); ensure_block(block_size); fwrite(buffer[0], 1, block_size, outf); pos += block_size; } fclose(outf); for(segment = 0; segment < NUM_SEGMENTS; segment++) fclose(inf[segment]); } int main(int argc, char **argv) { int i; int verbose = 0; int deprot = 0; if(argc < 2) printf("usage: protect [-v] <[-d] file1> <[-d] file2> ...\n"); for(i = 1; i < argc; i++) { if(!strcmp(argv[i], "-v")) verbose = 1; else if(!strcmp(argv[i], "-d")) deprot = 1; else /*file*/ { if(verbose) printf("%sprotecting %s\n", deprot ? "de" : "", argv[i]); if(!deprot) protect(argv[i]); else { deprotect(argv[i]); if(verbose) printf("%s: block recovers: %d, byte recovers: %d, bit recovers: %d\n", argv[i], num_block_errors, num_byte_errors, num_bit_errors); } deprot = 0; /*reset*/ } } return 0; }