Compare commits

...

5 commits

Author SHA1 Message Date
diegantobass
6280f848fe wrap up 2025-05-08 11:14:57 +02:00
diegantobass
3522b03652 clean addresses 2025-05-08 11:08:57 +02:00
diegantobass
585acde380 correct nicknames bug 2025-05-08 10:17:40 +02:00
diegantobass
c481fb90f6 reencode notes and custom fields 2025-05-08 09:59:54 +02:00
diegantobass
c3496a499c comment all code 2025-05-08 09:12:14 +02:00

View file

@ -1,23 +1,32 @@
import os import os
from re import escape
import sys import sys
import quopri import quopri
import base64 import base64
import pickle import pickle
import itertools
import phonenumbers import phonenumbers
import pythonvCard4
from pythonvCard4.vcard import Contact from pythonvCard4.vcard import Contact
# input should be a valid .vcf file, output in current dir
input_file = open(sys.argv[1], 'r', encoding="latin-1").readlines() input_file = open(sys.argv[1], 'r', encoding="latin-1").readlines()
output = open('output.vcf', 'w') output = open('output.vcf', 'w')
# this only outputs pictures of contacts with multiples in a dir for manual check
# current code stage ignores the problem and chooses the first picture anyway
check_for_multiple_pictures = False check_for_multiple_pictures = False
# a contact is defined by its END:VCARD markup
current_card = "" current_card = ""
for line in input_file: for line in input_file:
current_card += line current_card += line
if "END:VCARD" in line: if "END:VCARD" in line:
# the pythonvCard4 parsing assumes a LOT of things
contact = Contact.from_vcard(current_card) contact = Contact.from_vcard(current_card)
# TODO : isolate contacts with multiple pictures
if check_for_multiple_pictures: if check_for_multiple_pictures:
if "PHOTO" in contact.custom and len(contact.custom["PHOTO"]) > 1: if "PHOTO" in contact.custom and len(contact.custom["PHOTO"]) > 1:
os.makedirs("multiple_pictures/" + contact.fn, exist_ok=True) os.makedirs("multiple_pictures/" + contact.fn, exist_ok=True)
@ -26,15 +35,19 @@ for line in input_file:
with open("photo/" + contact.fn + "/" + str(image) + ".jpg", "wb") as f: with open("photo/" + contact.fn + "/" + str(image) + ".jpg", "wb") as f:
f.write(base64.decodebytes(str.encode(contact.custom["PHOTO"][image]))) f.write(base64.decodebytes(str.encode(contact.custom["PHOTO"][image])))
# choose the first pictures and ignores multiples
if "PHOTO" in contact.custom: if "PHOTO" in contact.custom:
os.makedirs("pictures/" + contact.fn, exist_ok=True) os.makedirs("pictures/" + contact.fn, exist_ok=True)
with open("pictures/" + contact.fn + "/profile.jpg", "wb") as f: with open("pictures/" + contact.fn + "/profile.jpg", "wb") as f:
f.write(base64.decodebytes(str.encode(contact.custom["PHOTO"][0]))) f.write(base64.decodebytes(str.encode(contact.custom["PHOTO"][0])))
contact.photo_path = "pictures/" + contact.fn + "/profile.jpg" contact.photo_path = "pictures/" + contact.fn + "/profile.jpg"
contact.custom = {} contact.custom.pop("PHOTO", None)
# reformat phone numbers to international
clean_tel = [] clean_tel = []
for number in contact.tel: for number in contact.tel:
if number["type"] != []:
print(number["type"])
try: try:
number = number["value"] number = number["value"]
number = number.replace("-", "") number = number.replace("-", "")
@ -44,25 +57,45 @@ for line in input_file:
clean_tel.append(number) clean_tel.append(number)
except phonenumbers.phonenumberutil.NumberParseException: except phonenumbers.phonenumberutil.NumberParseException:
continue continue
contact.tel = [{"value": x, "type": []} for x in clean_tel] contact.tel = [{"value": x, "type": []} for x in clean_tel]
# full-name reencoding
contact.fn = quopri.decodestring(contact.fn).decode() contact.fn = quopri.decodestring(contact.fn).decode()
contact.custom.pop("FN", None)
# name list reencoding
clean_n = [] clean_n = []
for name in contact.n: for name in contact.n:
clean = quopri.decodestring(name).decode() clean = quopri.decodestring(name).decode()
clean_n.append(clean) clean_n.append(clean)
contact.name = clean_n contact.n = clean_n
print(contact.name) contact.custom.pop("N", None)
# nickname reencoding
clean_nickname = [] clean_nickname = []
for nick in contact.nickname: for nick in contact.nickname:
nick = quopri.decodestring(nick).decode() nick = quopri.decodestring(nick).decode()
clean_n.append(nick) clean_nickname.append(nick)
contact.nickname = clean_nickname
# custom field reencoding
for field in contact.custom:
for value in contact.custom[field]:
contact.custom[field] = quopri.decodestring(value).decode()
# notes reencoding
if contact.note:
contact.note = quopri.decodestring(contact.note).decode()
# deduplicate and reencode address
if contact.adr != []:
adresses = []
for adresse in contact.adr:
adresses.append([quopri.decodestring(x).decode() for x in adresse["value"]])
clean = list(k for k,_ in itertools.groupby(adresses))
clean = [{"value":x, "type":["HOME"]} for x in clean]
contact.adr = clean
# rewrite contact as vcard
vcf_text = contact.to_vcard() vcf_text = contact.to_vcard()
output.write(vcf_text) output.write(vcf_text)
current_card = "" current_card = ""
continue