/* SOGoToolRemoveDoubles.m - this file is part of SOGo * * Copyright (C) 2009-2020 Inverse inc. * * This file is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This file is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; see the file COPYING. If not, write to * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ #import #import #import #import #import #import #import #import #import #import "SOGoTool.h" @interface NGVList (RemoveDoubles) - (NSArray *) cardNames; @end @implementation NGVList (RemoveDoubles) - (NSArray *) cardNames { NSEnumerator *cardReferences; NSMutableArray *cardNames; CardElement *currentReference; cardNames = [NSMutableArray array]; cardReferences = [[self cardReferences] objectEnumerator]; while ((currentReference = [cardReferences nextObject])) [cardNames addObject: [currentReference flattenedValuesForKey: @""]]; return cardNames; } @end @interface SOGoToolRemoveDoubles : SOGoTool @end @implementation SOGoToolRemoveDoubles + (NSString *) command { return @"remove-doubles"; } + (NSString *) description { return @"remove duplicate contacts from the specified user addressbook"; } - (void) feedDoubles: (NSMutableDictionary *) doubleEmails withRecord: (NSDictionary *) record andQuickField: (NSString *) field { NSString *recordEmail; NSMutableArray *recordList; /* we want to match the field value case-insensitively */ recordEmail = [[record objectForKey: field] uppercaseString]; if ([recordEmail length]) { recordList = [doubleEmails objectForKey: recordEmail]; if (!recordList) { recordList = [NSMutableArray arrayWithCapacity: 5]; [doubleEmails setObject: recordList forKey: recordEmail]; } [recordList addObject: record]; } } - (void) cleanupSingleRecords: (NSMutableDictionary *) doubleEmails { NSEnumerator *keys; NSString *currentKey; keys = [[doubleEmails allKeys] objectEnumerator]; while ((currentKey = [keys nextObject])) if ([[doubleEmails objectForKey: currentKey] count] < 2) [doubleEmails removeObjectForKey: currentKey]; } - (NSDictionary *) detectDoublesFromRecords: (NSArray *) records withQuickField: (NSString *) quickField { NSMutableDictionary *doubles; unsigned int count, max; doubles = [NSMutableDictionary dictionaryWithCapacity: [records count]]; max = [records count]; for (count = 0; count < max; count++) [self feedDoubles: doubles withRecord: [records objectAtIndex: count] andQuickField: quickField]; [self cleanupSingleRecords: doubles]; return doubles; } - (NSArray *) fetchCardsInListsFromFolder: (GCSFolder *) folder { EOQualifier *qualifier; NSMutableArray *cardsInLists; NSDictionary *currentRecord; NSArray *records; NSEnumerator *recordsEnum; NGVList *list; cardsInLists = [NSMutableArray array]; qualifier = [EOQualifier qualifierWithQualifierFormat: @"c_component = %@", @"vlist"]; records = [folder fetchFields: [NSArray arrayWithObject: @"c_content"] matchingQualifier: qualifier]; recordsEnum = [records objectEnumerator]; while ((currentRecord = [recordsEnum nextObject])) { list = [NGVList parseSingleFromSource: [currentRecord objectForKey: @"c_content"]]; [cardsInLists addObjectsFromArray: [list cardNames]]; } return cardsInLists; } - (void) removeRecord: (NSString *) recordName fromTable: (NSString *) tableName andQuickTable: (NSString *) quickTableName usingChannel: (EOAdaptorChannel *) channel { NSString *delSql; NSCalendarDate *now; /* We remove the records without regards to c_deleted because we really want to recover table space. */ now = [NSCalendarDate date]; delSql = [NSString stringWithFormat: @"UPDATE %@" @" SET c_deleted = 1, c_lastmodified = %lu," @" c_content = ''" @" WHERE c_name = '%@'", tableName, (NSUInteger) [now timeIntervalSince1970], recordName]; [channel evaluateExpressionX: delSql]; delSql = [NSString stringWithFormat: @"DELETE FROM %@" @" WHERE c_name = '%@'", quickTableName, recordName]; [channel evaluateExpressionX: delSql]; } - (void) removeRecords: (NSArray *) recordNames fromFolder: (GCSFolder *) folder { EOAdaptorChannel *channel; EOAdaptorContext *context; NSString *tableName, *quickTableName, *currentRecordName; NSEnumerator *recordsEnum; fprintf (stderr, #if GS_64BIT_OLD "Removing %d records...\n", #else "Removing %ld records...\n", #endif [recordNames count]); channel = [folder acquireStoreChannel]; context = [channel adaptorContext]; [context beginTransaction]; tableName = [folder storeTableName]; quickTableName = [folder quickTableName]; recordsEnum = [recordNames objectEnumerator]; while ((currentRecordName = [recordsEnum nextObject])) [self removeRecord: currentRecordName fromTable: tableName andQuickTable: quickTableName usingChannel: channel]; [context commitTransaction]; [folder releaseChannel: channel immediately: YES]; } - (NSArray *) namesOfRecords: (NSArray *) records differentFrom: (unsigned int) keptRecord count: (unsigned int) max { NSMutableArray *recordsToRemove; NSDictionary *currentRecord; unsigned int count; recordsToRemove = [NSMutableArray arrayWithCapacity: (max - 1)]; for (count = 0; count < max; count++) { if (count != keptRecord) { currentRecord = [records objectAtIndex: count]; [recordsToRemove addObject: [currentRecord objectForKey: @"c_name"]]; } } return recordsToRemove; } - (NSArray *) records: (NSArray *) records withLowestScores: (unsigned int *) scores count: (unsigned int) max { unsigned int count, highestScore; int highestScoreRecord; highestScore = 0; highestScoreRecord = -1; for (count = 0; count < max; count++) { if (scores[count] > highestScore) { highestScore = scores[count]; highestScoreRecord = count; } } if (highestScoreRecord == -1) highestScoreRecord = 0; return [self namesOfRecords: records differentFrom: highestScoreRecord count: max]; } - (int) mostModifiedRecord: (NSArray *) records count: (unsigned int) max { unsigned int mostModified, count, highestVersion, version; NSNumber *currentVersion; mostModified = 0; highestVersion = 0; for (count = 0; count < max; count++) { currentVersion = [[records objectAtIndex: count] objectForKey: @"c_version"]; version = [currentVersion intValue]; if (version > highestVersion) { mostModified = count; highestVersion = version; } } return mostModified; } - (int) amountOfFilledQuickFields: (NSDictionary *) record { static NSArray *quickFields = nil; id value; int amount, count, max; amount = 0; if (!quickFields) { quickFields = [NSArray arrayWithObjects: @"c_givenname", @"c_cn", @"c_sn", @"c_screenname", @"c_l", @"c_mail", @"c_o", @"c_ou", @"c_telephonenumber", nil]; [quickFields retain]; } max = [quickFields count]; for (count = 0; count < max; count++) { value = [record objectForKey: [quickFields objectAtIndex: count]]; if ([value isKindOfClass: [NSString class]]) { if ([value length]) amount++; } else if ([value isKindOfClass: [NSNumber class]]) amount++; } return amount; } - (int) recordWithTheMostQuickFields: (NSArray *) records count: (unsigned int) max { int mostQuickFields, count, highestQFields, currentQFields; mostQuickFields = 0; highestQFields = 0; for (count = 0; count < max; count++) { currentQFields = [self amountOfFilledQuickFields: [records objectAtIndex: count]]; if (currentQFields > highestQFields) { mostQuickFields = count; highestQFields = currentQFields; } } return mostQuickFields; } - (int) linesInContent: (NSString *) content { int nbrLines; NSArray *lines; lines = [content componentsSeparatedByString: @"\n"]; nbrLines = [lines count]; /* sometimes the end line will finish with a CRLF, we fix this */ if (![[lines objectAtIndex: nbrLines - 1] length]) nbrLines--; return nbrLines; } - (int) mostCompleteRecord: (NSArray *) records count: (unsigned int) max { int mostComplete, count, highestLines, lines; NSString *content; mostComplete = 0; highestLines = 0; for (count = 0; count < max; count++) { content = [[records objectAtIndex: count] objectForKey: @"c_content"]; lines = [self linesInContent: content]; if (lines > highestLines) { mostComplete = count; highestLines = lines; } } return mostComplete; } - (int) record: (NSArray *) records referencedInLists: (NSArray *) cardsInLists { int recordIndex, count, max; NSDictionary *currentRecord; recordIndex = -1; max = [records count]; count = 0; while (recordIndex == -1 && count < max) { currentRecord = [records objectAtIndex: count]; if ([cardsInLists containsObject: [currentRecord objectForKey: @"c_name"]]) recordIndex = count; else count++; } return recordIndex; } - (void) assignScores: (unsigned int *) scores toRecords: (NSArray *) records count: (unsigned int) max withCardsInLists: (NSArray *) cardsInLists { /* Records is an Array of record which are duplicates of each other. The goal here is to know which one to keep and whoch ones to discard. We will assign a score to each record, the one with the best scores is kept Record which has been the last modified: +1 Record has the most content: +2 Record has the most quick field set: +3 Record is in a list: +6 If two record have the same, for exemple, content. It's the first one on the list that will get the points. If two recors have the same score. t's the first one on the list that will get the points. quick fiels are =("c_givenname": Firstname, @"c_cn": Display, @"c_sn": LastName, @"c_screenname": Screen Name @"c_l": City, @"c_mail": mails, @"c_o": organisation, @"c_ou": organisation unit, @"c_telephonenumber": telephone) */ int recordIndex; recordIndex = [self mostModifiedRecord: records count: max]; (*(scores + recordIndex))++; recordIndex = [self mostCompleteRecord: records count: max]; (*(scores + recordIndex)) += 2; recordIndex = [self recordWithTheMostQuickFields: records count: max]; (*(scores + recordIndex)) += 3; /* TODO: this method is ugly. Instead of replacing the card references in the list with the most useful one, we remove the cards that are not mentionned in the list. */ recordIndex = [self record: records referencedInLists: cardsInLists]; if (recordIndex > -1) (*(scores + recordIndex)) += 6; } - (NSArray *) detectRecordsToRemove: (NSDictionary *) records withCardsInLists: (NSArray *) cardsInLists { NSMutableArray *recordsToRemove; NSEnumerator *recordsEnum; NSArray *currentRecords; unsigned int *scores, max; recordsToRemove = [NSMutableArray arrayWithCapacity: [records count] * 4]; recordsEnum = [[records allValues] objectEnumerator]; while ((currentRecords = [recordsEnum nextObject])) { max = [currentRecords count]; scores = NSZoneCalloc (NULL, max, sizeof (unsigned int)); [self assignScores: scores toRecords: currentRecords count: max withCardsInLists: cardsInLists]; [recordsToRemove addObjectsFromArray: [self records: currentRecords withLowestScores: scores count: max]]; NSZoneFree (NULL, scores); } return recordsToRemove; } - (BOOL) removeDoublesFromFolder: (GCSFolder *) folder { NSArray *fields, *records, *recordsToRemove; NSMutableDictionary *doubles; EOQualifier *qualifier; BOOL rc; fields = [NSArray arrayWithObjects: @"c_name", @"c_givenname", @"c_cn", @"c_sn", @"c_screenname", @"c_l", @"c_mail", @"c_o", @"c_ou", @"c_telephonenumber", @"c_content", @"c_version", @"c_creationdate", @"c_lastmodified", nil]; qualifier = [EOQualifier qualifierWithQualifierFormat: @"c_component = %@", @"vcard"]; records = [folder fetchFields: fields matchingQualifier: qualifier]; if (records) { rc = YES; doubles = [NSMutableDictionary dictionary]; [doubles addEntriesFromDictionary: [self detectDoublesFromRecords: records withQuickField: @"c_mail"]]; [doubles addEntriesFromDictionary: [self detectDoublesFromRecords: records withQuickField: @"c_cn"]]; recordsToRemove = [self detectRecordsToRemove: doubles withCardsInLists: [self fetchCardsInListsFromFolder: folder]]; if ([recordsToRemove count]) { [self removeRecords: recordsToRemove fromFolder: folder]; fprintf (stderr, #if GS_64BIT_OLD "Removed %d records from %d.\n", #else "Removed %ld records from %ld.\n", #endif [recordsToRemove count], [records count]); } else fprintf (stderr, #if GS_64BIT_OLD "No record to remove. %d records kept.\n", #else "No record to remove. %ld records kept.\n", #endif [records count]); } else { fprintf (stderr, "Unable to fetch required fields from folder.\n"); rc = NO; } return rc; } - (BOOL) processFolder: (NSString *) folderId ofUser: (NSString *) username withFoM: (GCSFolderManager *) fom { NSString *folderPath; GCSFolder *folder; BOOL rc; folderPath = [NSString stringWithFormat: @"/Users/%@/Contacts/%@", username, folderId]; folder = [fom folderAtPath: folderPath]; if (folder) rc = [self removeDoublesFromFolder: folder]; else { fprintf (stderr, "Folder '%s' of user '%s' not found.\n", [folderId UTF8String], [username UTF8String]); rc = NO; } return rc; } - (BOOL) runWithFolder: (NSString *) folder andUser: (NSString *) username { GCSFolderManager *fom; BOOL rc; fom = [GCSFolderManager defaultFolderManager]; if (fom) rc = [self processFolder: folder ofUser: username withFoM: fom]; else rc = NO; return rc; } - (void) usage { fprintf (stderr, "Usage: remove-doubles USER FOLDER\n\n" " USER the owner of the contact folder\n" " FOLDER the id of the folder to clean up\n"); } - (BOOL) run { BOOL rc; if ([arguments count] == 2) rc = [self runWithFolder: [arguments objectAtIndex: 1] andUser: [arguments objectAtIndex: 0]]; else { [self usage]; rc = NO; } return rc; } @end