vcf_cleaner/vcf_cleaner.py
2025-05-08 10:17:40 +02:00

89 lines
3.4 KiB
Python

import os
from re import escape
import sys
import quopri
import base64
import pickle
import phonenumbers
import pythonvCard4
from pythonvCard4.vcard import Contact
# input should be a valid .vcf file, output in current dir
input_file = open(sys.argv[1], 'r', encoding="latin-1").readlines()
output = open('output.vcf', 'w')
# this only outputs pictures of contacts with multiples in a dir for manual check
# current code stage ignores the problem and chooses the first picture anyway
check_for_multiple_pictures = False
# a contact is defined by its END:VCARD markup
current_card = ""
for line in input_file:
current_card += line
if "END:VCARD" in line:
# the pythonvCard4 parsing assumes a LOT of things
contact = Contact.from_vcard(current_card)
# TODO : isolate contacts with multiple pictures
if check_for_multiple_pictures:
if "PHOTO" in contact.custom and len(contact.custom["PHOTO"]) > 1:
os.makedirs("multiple_pictures/" + contact.fn, exist_ok=True)
for image in range(len(contact.custom["PHOTO"])):
print(contact.custom["PHOTO"][image])
with open("photo/" + contact.fn + "/" + str(image) + ".jpg", "wb") as f:
f.write(base64.decodebytes(str.encode(contact.custom["PHOTO"][image])))
# choose the first pictures and ignores multiples
if "PHOTO" in contact.custom:
os.makedirs("pictures/" + contact.fn, exist_ok=True)
with open("pictures/" + contact.fn + "/profile.jpg", "wb") as f:
f.write(base64.decodebytes(str.encode(contact.custom["PHOTO"][0])))
contact.photo_path = "pictures/" + contact.fn + "/profile.jpg"
contact.custom.pop("PHOTO", None)
# reformat phone numbers to international
# TODO : distinguish phone numbers in final vcard (hint : "type" ?!)
clean_tel = []
for number in contact.tel:
try:
number = number["value"]
number = number.replace("-", "")
number = phonenumbers.parse(number, region="FR")
number = phonenumbers.format_number(number, phonenumbers.PhoneNumberFormat.INTERNATIONAL)
if number not in clean_tel:
clean_tel.append(number)
except phonenumbers.phonenumberutil.NumberParseException:
continue
contact.tel = [{"value": x, "type": []} for x in clean_tel]
# full-name reencoding
contact.fn = quopri.decodestring(contact.fn).decode()
contact.custom.pop("FN", None)
# name list reencoding
clean_n = []
for name in contact.n:
clean = quopri.decodestring(name).decode()
clean_n.append(clean)
contact.n = clean_n
contact.custom.pop("N", None)
# nickname reencoding
clean_nickname = []
for nick in contact.nickname:
nick = quopri.decodestring(nick).decode()
clean_nickname.append(nick)
# custom field reencoding
for field in contact.custom:
for value in contact.custom[field]:
contact.custom[field] = quopri.decodestring(value).decode()
if contact.note:
contact.note = quopri.decodestring(contact.note).decode()
# rewrite contact as vcard
vcf_text = contact.to_vcard()
output.write(vcf_text)
current_card = ""