Note that this script marks all mails in your mailboxes as read so be careful. Also I am not responsible for any data loses or corruption.Save this with some filename with .py extension and run it from command line as filename.py. Also this is written for 3.x python version.
import imaplib,re,mailbox
from email.parser import BytesParser
class mbox:
def __init__(self,filename):
filename=filename.split('/')[-1]+".mbox"
#print(filename)
self.file_handle=mailbox.mbox((filename))
def dump_mails(self,msg_list):
try:
self.file_handle.lock()
for each_msg in msg_list:
mbox_msg=mailbox.mboxMessage(each_msg)
self.file_handle.add(mbox_msg)
self.file_handle.flush()
finally:
self.file_handle.unlock()
def close(self):
self.file_handle.close()
class gmail:
def __init__(self):
self.IMAP_SERVER='imap.gmail.com'
self.IMAP_PORT=993
self.M = None
self.response=None
self.mailboxes=[]
def connect(self,username,password):
self.M=imaplib.IMAP4_SSL(self.IMAP_SERVER,self.IMAP_PORT)
status,self.response=self.M.login(username,password)
return status
def get_mailboxes(self):
rc,self.response=self.M.list()
#pattern=r'"([[a-zA-Z0-9]+]/)*[a-zA-Z0-9 ]+"'
pattern=r'".*?"'
pattern=re.compile(pattern)
for item in self.response:
item=item.decode("ascii")
folder_subpath_list=pattern.findall(item)
self.mailboxes.append(folder_subpath_list[-1][1:-1])
#print("self.mailbox",self.mailboxes)
return self.mailboxes
def get_mailcount(self,mailbox='inbox'):
rc,self.response=self.M.select(mailbox)
self.mailcount=int(self.response[0])
return self.mailcount
def get_unread_mailcount(self,mailbox='inbox'):
rc,message=self.M.status(mailbox,"(UNSEEN)")
for item in message:
print("message item= ",item)
unreadCount = re.search("UNSEEN (\d+)", str(message[0])).group(1)
return unreadCount
def rename_mailbox(self, oldmailbox, newmailbox):
rc, self.response = self.M.rename(oldmailbox, newmailbox)
return rc
def create_mailbox(self, mailbox):
rc,self.response= self.M.create(mailbox)
return rc
def delete_mailbox(self, mailbox):
rc, self.response = self.M.delete(mailbox)
return rc
#retrieves all mails and probably marks them as read so be carefull
def get_all_mails(self,mailfolder):
#select specified mailfolder at server
rc,self.response=self.M.select(mailbox=mailfolder)
#get mail numbers list
status,data=self.M.search(None,'ALL')
mail_obj_list=[]
for mail_no in data[0].split():
status,mail=self.M.fetch(mail_no,'(RFC822)')
mail_obj=BytesParser().parsebytes((mail[0][1]))
mail_obj_list.append(mail_obj)
return mail_obj_list
def disconnect(self):
self.M.logout()
#main script starts
gmail_link=gmail()
username=input('Enter username:').strip()
password=getpass.getpass()
print ("connecting...")
print (gmail_link.connect(username,password),gmail_link.response)
print ("Retrieving mailboxes...")
mailboxes=gmail_link.get_mailboxes()
print ("Done...")
try:
if mailbox.find(' ')!=-1:
continue
print ("Processing mailbox:",mailbox)
#get all mails_list present in mailbox
mails=gmail_link.get_all_mails(mailfolder=mailbox)
#create new mailbox(type =mbox) for each folder(mailbox)
mailbox_handle=mbox(filename=mailbox)
#add mails to mailbox created
mailbox_handle.dump_mails(mails)
print ("Processing mailbox:",mailbox,"done")
except:
print("Could not retrieve from mailbox:",mailbox)
finally:
mailbox_handle.close()
print("Disconnecting...")
gmail_link.disconnect()