Compare commits

..

5 commits

Author SHA1 Message Date
diegantobass
6280f848fe wrap up 2025-05-08 11:14:57 +02:00
diegantobass
3522b03652 clean addresses 2025-05-08 11:08:57 +02:00
diegantobass
585acde380 correct nicknames bug 2025-05-08 10:17:40 +02:00
diegantobass
c481fb90f6 reencode notes and custom fields 2025-05-08 09:59:54 +02:00
diegantobass
c3496a499c comment all code 2025-05-08 09:12:14 +02:00

View file

@ -1,23 +1,32 @@
import os
from re import escape
import sys
import quopri
import base64
import pickle
import itertools
import phonenumbers
import pythonvCard4
from pythonvCard4.vcard import Contact
# input should be a valid .vcf file, output in current dir
input_file = open(sys.argv[1], 'r', encoding="latin-1").readlines()
output = open('output.vcf', 'w')
# this only outputs pictures of contacts with multiples in a dir for manual check
# current code stage ignores the problem and chooses the first picture anyway
check_for_multiple_pictures = False
# a contact is defined by its END:VCARD markup
current_card = ""
for line in input_file:
current_card += line
if "END:VCARD" in line:
# the pythonvCard4 parsing assumes a LOT of things
contact = Contact.from_vcard(current_card)
# TODO : isolate contacts with multiple pictures
if check_for_multiple_pictures:
if "PHOTO" in contact.custom and len(contact.custom["PHOTO"]) > 1:
os.makedirs("multiple_pictures/" + contact.fn, exist_ok=True)
@ -26,15 +35,19 @@ for line in input_file:
with open("photo/" + contact.fn + "/" + str(image) + ".jpg", "wb") as f:
f.write(base64.decodebytes(str.encode(contact.custom["PHOTO"][image])))
# choose the first pictures and ignores multiples
if "PHOTO" in contact.custom:
os.makedirs("pictures/" + contact.fn, exist_ok=True)
with open("pictures/" + contact.fn + "/profile.jpg", "wb") as f:
f.write(base64.decodebytes(str.encode(contact.custom["PHOTO"][0])))
contact.photo_path = "pictures/" + contact.fn + "/profile.jpg"
contact.custom = {}
contact.custom.pop("PHOTO", None)
# reformat phone numbers to international
clean_tel = []
for number in contact.tel:
if number["type"] != []:
print(number["type"])
try:
number = number["value"]
number = number.replace("-", "")
@ -44,25 +57,45 @@ for line in input_file:
clean_tel.append(number)
except phonenumbers.phonenumberutil.NumberParseException:
continue
contact.tel = [{"value": x, "type": []} for x in clean_tel]
# full-name reencoding
contact.fn = quopri.decodestring(contact.fn).decode()
contact.custom.pop("FN", None)
# name list reencoding
clean_n = []
for name in contact.n:
clean = quopri.decodestring(name).decode()
clean_n.append(clean)
contact.name = clean_n
print(contact.name)
contact.n = clean_n
contact.custom.pop("N", None)
# nickname reencoding
clean_nickname = []
for nick in contact.nickname:
nick = quopri.decodestring(nick).decode()
clean_n.append(nick)
contact.nickname = clean_nickname
clean_nickname.append(nick)
# custom field reencoding
for field in contact.custom:
for value in contact.custom[field]:
contact.custom[field] = quopri.decodestring(value).decode()
# notes reencoding
if contact.note:
contact.note = quopri.decodestring(contact.note).decode()
# deduplicate and reencode address
if contact.adr != []:
adresses = []
for adresse in contact.adr:
adresses.append([quopri.decodestring(x).decode() for x in adresse["value"]])
clean = list(k for k,_ in itertools.groupby(adresses))
clean = [{"value":x, "type":["HOME"]} for x in clean]
contact.adr = clean
# rewrite contact as vcard
vcf_text = contact.to_vcard()
output.write(vcf_text)
current_card = ""
continue