Monday, July 3, 2017

Remove SPAM from gmail automatically 6

In this latest version I just put the two main routines together in one. So RemoveFromFolder is now the main routine. This is called from RemoveFromTrash or RemoveFromSpam.
The first parameter in RemoveFromFolder is which folder is going to be scanned, either the TRASH folder (sometimes called BIN) or de SPAM folder. The second one is to tell what it must do, delete or put to the trash can.
It makes little sense to remove files from the trashcan into trash by using RemoveFromFolder('TRASH','TRASH') of course. So here is the code...
2017-07-20: minor fix with matchSender


function RemoveFromTrash(){
  RemoveFromFolder('TRASH','DELETE');
}

function RemoveFromSpam(){
  RemoveFromFolder('SPAM','DELETE');
}

function RemoveFromFolder(FolderName,ActionType){
  if (FolderName=='TRASH'){
    var threads=GmailApp.getTrashThreads();
  }else if (FolderName=='SPAM'){
    var threads=GmailApp.getSpamThreads();
  }
  //var regExp = new RegExp("(?:\\s)([a-z]+)", "gi");
  //var regExp = new RegExp("Return-Path: .*onmicrosoft.com>");

  //get MessageID
  // "i" is for case insensitive
  var MessageID=RegExp("Message-ID: <.+>","ig");
  var MessageReturnPath=RegExp("Return-Path: <.+>","ig");
  var MessageReceivedFrom=RegExp("Received: from .+ ","ig");
  var MessageReplyTo=RegExp("Reply-To: <.+>","ig");
  var MessageDkimSignature=RegExp("DKIM-Signature: <.+>","ig");
  var MessageReceived=RegExp("Received: .+","ig");
  var MessageXAuthenticatedUser=RegExp("X-Authenticated-User: .+","ig");
  var MessageSender=RegExp("Sender: .+","ig");

  //var MessageIDBlock=RegExp("Message-ID: <[a-zA-Z0-9-@.]+(gq|ml|tk|px|cf|ga|jp|cf|cc|it|id|tr|cl|gr|ec|ar|pe|co|ua|cr|hk|bn|za|in|br|id|it|mx|cn|life|atlis1|biz|glocal\.net|gov\.my|local|att\.net|prod\.outlook\.com|onmicrosoft\.com|localdomain|trade|cisco\.com|co\.in|arcamax\.com|alice\.it|co\.nz|gov\.br|telkom\.net|vevida\.net|global\.net|[0-9.]+)>");

  //for breaking up the header and the mailbody we need this
  var crcr=String.fromCharCode(10)+String.fromCharCode(10);
 
  //var regExpMailFrom = RegExp("[a-z-@.]+\.(cisco.com)","ig"); // "i" is for case insensitive
  //for all threads
  //find Content-Transfer-Encoding: base64

  //All spam rules are in here
  var spam_from,spam_replyto,spam_messageid,spam_returnpath,spam_dkim_signature,spam_sender,spam_body;
  spam_from=['.gov.cn>','.ru>','.xyz>','.cf>','.za>','.domainandmarket.com>','.com.ec>','.men>','.simplemailwork.com>','.yahoo.co.jp>','.edu>','.edu.tw>','.edu.pl>','.uy>'];
  spam_from=spam_from.concat(['.cesterinevitably.com>','.permanentbrain.com>','.pro>','.cisco.com>','.gate01.com>','.co.com>','.amazonrewardonline.com>','.kingdryer.com>','.life>']);
  spam_from=spam_from.concat(['.perposed.com>','.muysle.us>','.telkomsa.net>','.ocn.ne.jp>','.onmicrosoft.edu>','.cc>','.marektbisniss.com>','.cc>','.offer.com>','emarketingit.com>']);
  spam_from=spam_from.concat(['.ws>','.top>','.ml>','.us>','.ga>','listtopcorp.com>']);
  spam_replyto=['.permanentbrain.com>'];
  spam_messageid=['.gov.cn>','.ru>','.xyz>','.cf>','.za>','.domainandmarket.com>','.probalsa.com.ec>','.men>','.simplemailwork.com>','.edu.tw>','.edu.pl>','.cisco.com>'];
  spam_messageid=spam_messageid.concat(['.gq>','.ml>','.tk>','.px>','.cf>','.ga>','.jp>','.cf>','.cc>','.it>','.id>','.tr>','.gr>','.ec>','.ar>','.pe>','.co>','.ua>','.cr>','.hk>','.bn>','.za>']);
  spam_messageid=spam_messageid.concat(['.in>','.br>','.id>','.it>','.mx>','.cn>','.life>','.atlis1>','.biz>','.glocal.net>','.gov.my>','.local>','.att.net>','.prod.outlook.com>','.onmicrosoft.com>']);
  spam_messageid=spam_messageid.concat(['.trade>','.cisco.com>','.co.in>','.arcamax.com>','.alice.it>','.co.nz>','.gov>','.br>','.telkom.net>','.vevida.net>','.global.net>','SMTPIN_ADDED_MISSING@mx.google.com>']);
  spam_messageid=spam_messageid.concat(['.prod.outlook.com>','.betemail.com>','.cisco.com>','.openasiagroup.com>','.vozoti.us>','.itcrje.us>','.dowjones.net>','@email.amazonses.com>']);
  spam_messageid=spam_messageid.concat(['.localdomain>','.es>','co.id']);
  spam_returnpath=['sendgrid.net>','ikexpress.com>','ddadr.com>','1and1.fr>','.com.co>'];
  spam_dkim_signature=['.ccsend.com;'];
  spam_sender=['.mandrillapp.com>'];
  spam_body=['.dma.trade/'];
  //var spam_authentication_results=[Authentication-Results

  for (var i=0;i<threads.length;i++){
    var messages=threads[i].getMessages();
    var messages_length=messages.length;
    var cnt=0;
    //for all messages in the thread
    for (var m=0;m<messages_length;m++){
      //get mail parts that are interesting
      var mail_replyto=messages[m].getReplyTo();
      var mail_from=messages[m].getFrom();
      var mail_to=messages[m].getTo();
      var mail_bcc=messages[m].getBcc();
      var mail_cc=messages[m].getCc();
      var mail_body=messages[m].getBody();
      var mail_plainbody=messages[m].getPlainBody();  //the body without HTML
      var mail_date=messages[m].getDate();
      var mail_id=messages[m].getId();
      var mail_subject=messages[m].getSubject().toLowerCase();
      //var mail_thread=messages[m].getThread();
      //var mail_attachments=messages[m].getAttachments();
      var mail_rawcontent=messages[m].getRawContent();  //the raw email
      var mail_rawcontent_lowercase=mail_rawcontent.toLowerCase();
     
      //get mail header and get the mail_header lines
      var x=mail_rawcontent.length;
      var cr=String.fromCharCode(13);
      var lf=String.fromCharCode(10);
      var crcr_location=mail_rawcontent.indexOf(cr+lf+cr+lf);
      var mail_header=mail_rawcontent.substr(0,crcr_location);
      var mail_header_lines=mail_header.split(cr+lf);

      //set regex for special parts
      var matchMessageID = mail_header.match(MessageID,"ig");
      var matchMessageReceivedFrom = mail_header.match(MessageReceivedFrom,"ig");
      var matchMessageReturnPath = mail_header.match(MessageReturnPath,"ig");
      var matchMessageReplyTo = mail_header.match(MessageReplyTo,"ig");
      var matchMessageDkimSignature = mail_header.match(MessageDkimSignature,"ig");
      var matchMessageXAuthenticatedUser = mail_header.match(MessageXAuthenticatedUser,"ig");
      var matchSender=mail_header.match(MessageSender,"ig");
      //var matchMessageIDBlock = mail_header.match(MessageIDBlock,"ig");
     
      //spam_dkim_signature
      //var matchMessageReceivedFrom=regExpMessageReceivedFrom.exec(mail_header);
      //dont bother the next line google does that for you
      //var matchMessageBase64 = mail_rawcontent.indexOf('Content-Transfer-Encoding: base64');
     
      //set regex results
      var mail_messageid="";
      var mail_message_received_from="";
      var mail_message_dkim_signature="";
      var mail_message_return_path="";
      var mail_message_reply_to="";
      var mail_message_xauthenticated_user="";
      var mail_message_sender="";

      if (matchMessageID!=null){
        mail_messageid=matchMessageID[0];
      }
      if (matchMessageReceivedFrom!=null){
        mail_message_received_from=matchMessageReceivedFrom[0];
      }
      if (matchMessageDkimSignature!=null){
        mail_message_dkim_signature=matchMessageDkimSignature[0];
      }
      if (matchMessageReturnPath!=null){
        mail_message_return_path=matchMessageReturnPath[0];
      }
      if (matchMessageReplyTo!=null){
        mail_message_reply_to=matchMessageReplyTo[0];
      }
      if (matchMessageXAuthenticatedUser!=null){
        mail_message_xauthenticated_user=matchMessageXAuthenticatedUser[0];
      }
      if (matchSender!=null){
        mail_message_sender=matchSender[0];
      }

     
      //standard check
      for (var k=0;k<spam_from.length;k++){
        if (mail_from.indexOf(spam_from[k])>=0){
          cnt++;
          break;
        }
      }
     
      for (var k=0;k<spam_messageid.length;k++){
        if (mail_messageid.indexOf(spam_messageid[k])>=0){
          cnt++;
          break;
        }
      }

      for (var k=0;k<spam_replyto.length;k++){
        if (mail_replyto.indexOf(spam_replyto[k])>=0){
          cnt++;
          break;
        }
      }

      for (var k=0;k<spam_dkim_signature.length;k++){
        if (mail_message_dkim_signature.indexOf(mail_message_dkim_signature[k])>=0){
          cnt++;
          break;
        }
      }


      for (var k=0;k<spam_sender.length;k++){
        if (mail_message_sender.indexOf(mail_message_sender[k])>=0){
          cnt++;
          break;
        }
      }

      for (var k=0;k<spam_body.length;k++){
        if (mail_body.indexOf(mail_body[k])>=0){
          cnt++;
          break;
        }
      }
      /*
      for (var k=0;k<spam_returnpath.length;k++){
        for (var l=0;l<matchMessageReturnPath.length;l++){
          var sr=spam_returnpath[k];
          var mm=matchMessageReturnPath[l];
          var mtc=matchMessageReturnPath[l].indexOf(spam_returnpath[k]);
          if (matchMessageReturnPath[l].indexOf(spam_returnpath[k])>=0){
            cnt++;
            break;
          }
        }
      }
      */
      //see https://tools.ietf.org/html/rfc2045
      //var crcr_location=(mail_rawcontent.IndexOf(cr));
      //var result = regex.exec(mail_rawcontent_lowercase);
      //regex: Message-ID: <[A-Za-z0-9-@]+.tk>   matches Message-ID: <......tk>
      //var name = "John Smith";
      //spammers often leave the Message-ID alone they need it, so we can use it to see if it comes from a bad domain.
      //nowadays there are all new domains for free such as .cf .men .px .ml .gq   etc  I simple block those domains.
      //Examples:
      //Message-ID: <3A03C937-BEA9-4589-867F-593CA3104153@qhgkoctono.ml>
      //Message-ID: <9E2BDE9F-0435-4038-AAE9-4E1B6681DD55@nyonwbeejb.cf>
      //Message-ID: <vdyfn9e6uh7g7gx@u3afzwjx7iql4gx.local.px>
      //Message-ID: <D6C74CE7-CFAD-4E1C-9430-3BEDED41D70D@zgyciflzz.gq>
      //prod.outlook.com|cantv.net|cisco.com
     
      //Message-Id: <592b9af3.a284500a.1f2a9.e747SMTPIN_ADDED_MISSING@mx.google.com>
      //if (matchMessageIDBlock!=null){
      //  if (matchMessageIDBlock.length>0){
      //    cnt++;
      //  }
      //}else if (matchMessageID!=null){
      //  if (matchMessageID[0].indexOf('SMTPIN_ADDED_MISSING@mx.google.com')!=-1){
      //    cnt++;
      //  }
      //}else
      if (matchMessageReceivedFrom!=null){
        if (matchMessageReceivedFrom.length>0){
          if (matchMessageReceivedFrom.indexOf('.cc')!=-1){
            cnt++;
          }
        }
      }else if (matchMessageReturnPath!=null){
        if (matchMessageReturnPath.length>=0){
          if (matchMessageReturnPath.indexOf('.at')!=-1){
            cnt++;
          }
        }
      }else if (mail_rawcontent_lowercase.indexOf('<http://bit.ly/')!=-1){
        cnt++;
//      }else if (mail_rawcontent.indexOf('If you do not wish to continue </em> receiving email newsletters')!=-1){
//        delmsg++;
      }else if (mail_rawcontent_lowercase.indexOf('<http://tinyurl.com/')!=-1){
        cnt++;
//      }else if (mail_rawcontent_lowercase.indexOf('a href="<http://tinyurl.com/')!=-1){
//        delmsg++;
//      }else if (mail_rawcontent_lowercase.indexOf('<img src="http://tinyurl.com/')!=-1){
//        delmsg++;
//      }else if (mail_rawcontent_lowercase.indexOf('onmicrosoft.com>')!=-1){
//        delmsg++;
        //Gmail.Users.Messages.remove('me', threads[m].getId());
      }else if (mail_rawcontent.indexOf('.onmicrosoft.com>')!=-1){
          cnt++;
      }
/*  this is not needed mail_body already holds a decoded 64 if it is in base64 or other, thanks to google.
      }else if (mail_rawcontent.indexOf('Content-Transfer-Encoding: base64')!=-1){
        //mail is base64 encoded so decode it
        mail_body=base64decode(mail_body);
      }
*/
       
      if (cnt>0){
        try {
          if (ActionType=='DELETE'){
            Gmail.Users.Messages.remove('me', messages[m].getId());
          }else if (ActionType=='TRASH'){
            messages[m].moveToTrash();
          }
        } finally {
        }
      }
      //Received-SPF: softfail (google.com: domain of transitioning info@info.com does not designate 95.0.185.114 as permitted sender) client-ip=95.0.185.114;
      //intersesting part is here: google.com: ....  does not designate .... as permitted sender  we can use that to block the stupid spammers as well
    }
  }
}


function TrashMail() {
  var threads = GmailApp.search("in:trash");
  for (var i = 0; i < threads.length; i++) {
    Gmail.Users.Messages.remove('me', threads[i].getId());
  }
}


Since google is changing to OAuth the procedure will not work without setting the proper api's and Oauth settings.Apparently this doesnt influence the working of the script as far as I see...

Well this was wrong from me... It does still work on my account. My mail box is automatially cleaned according to my filers.

Friday, June 9, 2017

Remove SPAM from gmail automatically 5

The newest version is the beggining for a simple antispam. In this latest version I made it simpler to make your own list. With some time left I want to make the whole routine work in such way that it works on any given folder.
Be it so, the foldernames of the trash and spam folder ar different for each language.. I wonder hot that will work out.

function RemoveFromTrash(){
  //var regExp = new RegExp("(?:\\s)([a-z]+)", "gi");
  //var regExp = new RegExp("Return-Path: .*onmicrosoft.com>");

  //get MessageID
  var MessageID=RegExp("Message-ID: <.+>","ig");
  // "i" is for case insensitive
  var MessageIDBlock="Message-ID: <[a-zA-Z0-9-@.]+(gq|ml|tk|px|cf|ga|jp|cf|cc|it|id|tr|cl|gr|ec|ar|pe|co|ua|cr|hk|bn|za|in|br|id|it|mx|cn|life|atlis1|biz|glocal\.net|gov\.my|local|att\.net|prod\.outlook\.com|onmicrosoft\.com|localdomain|trade|cisco\.com|co\.in|arcamax\.com|alice\.it|co\.nz|gov\.br|telkom\.net|vevida\.net|global\.net|[0-9.]+)>";
  var MessageIDSMTPIN="Message-ID: <.+SMTPIN_ADDED_MISSING@mx.google.com>";
  var MessageReturnPath="Return-Path: <.+>";
  var MessageReceivedFrom="Received: from .+ ";
  var MessageReplyTo="Reply-To: <.+>";
  //for breaking up the header and the mailbody we need this
  var crcr=String.fromCharCode(10)+String.fromCharCode(10);
 
  //var regExpMailFrom = RegExp("[a-z-@.]+\.(cisco.com)","ig"); // "i" is for case insensitive
  //for all threads
  //find Content-Transfer-Encoding: base64

  var spam_from=['gov.cn>','.ru>','.xyz>','.cf>','za','domainandmarket.com>','probalsa.com.ec>','.men>','simplemailwork.com>','yahoo.co.jp>','.edu>','edu.tw>','edu.pl>'];
  spam_from=spam_from.concat(['cesterinevitably.com>','permanentbrain.com>','pro>']);
  var spam_replyto=['permanentbrain.com>'];
  var spam_messageid=['gov.cn>','.ru>','.xyz>','.cf>','za>','domainandmarket.com>','probalsa.com.ec>','.men>','simplemailwork.com>','edu.tw>','edu.pl>','cisco.com>'];
  spam_messageid=spam_messageid.concat(['.gq>','.ml>','.tk>','.px>','.cf>','.ga>','.jp>','.cf>','.cc>','.it>','.id>','.tr>','.gr>','.ec>','.ar>','.pe>','.co>','.ua>','.cr>','.hk>','.bn>','.za>']);
  spam_messageid=spam_messageid.concat(['in>','br>','id>','it>','mx>','cn>','life>','atlis1>','biz>','glocal.net>','gov.my>','local>','att.net>','prod.outlook.com>','onmicrosoft.com>','localdomain>']);
  spam_messageid=spam_messageid.concat(['trade>','cisco.com>','co.in>','arcamax.com>','alice.it>','co.nz>','gov>','.br>','telkom.net>','vevida.net>','global.net>','SMTPIN_ADDED_MISSING@mx.google.com>']);
  spam_messageid=spam_messageid.concat(['prod.outlook.com>','betemail.com>','cisco.com>']);

  var threads= GmailApp.getTrashThreads();
  //var threads=GmailApp.getSpamThreads();

  for (var i=0;i<threads.length;i++){
    var messages=threads[i].getMessages();
    var messages_length=messages.length;
    var cnt=0;
    //for all messages in the thread
    for (var m=0;m<messages_length;m++){
      //get raw mail
      //var mail_messageID=Message;
      var mail_replyto=messages[m].getReplyTo();
      var mail_from=messages[m].getFrom();
      var mail_to=messages[m].getTo();
      var mail_bcc=messages[m].getBcc();
      var mail_cc=messages[m].getCc();
      var mail_body=messages[m].getBody();
      var mail_plainbody=messages[m].getPlainBody();  //the body without HTML
      var mail_date=messages[m].getDate();
      var mail_id=messages[m].getId();
      var mail_subject=messages[m].getSubject().toLowerCase();
      //var mail_thread=messages[m].getThread();
      //var mail_attachments=messages[m].getAttachments();
      var mail_rawcontent=messages[m].getRawContent();  //the raw email
      var mail_rawcontent_lowercase=mail_rawcontent.toLowerCase();
      var x=mail_rawcontent.length;
      var cr=String.fromCharCode(13);
      var lf=String.fromCharCode(10);
      var crcr_location=mail_rawcontent.indexOf(cr+lf+cr+lf);
      var mail_header=mail_rawcontent.substr(0,crcr_location);
      var mail_header_lines=mail_header.split(cr+lf);

      var matchMessageID = mail_header.match(MessageID,"ig");
      var matchMessageReceivedFrom = mail_header.match(MessageReceivedFrom,"ig");
      var matchMessageIDBlock = mail_header.match(MessageIDBlock,"ig");
      var matchMessageIDSMTPIN = mail_header.match(MessageIDSMTPIN,"ig");
      var matchMessageReturnPath = mail_header.match(MessageReturnPath,"ig");
      var matchMessageReplyTo = mail_header.match(MessageReplyTo,"ig");
      //var matchMessageReceivedFrom=regExpMessageReceivedFrom.exec(mail_header);
      //dont bother the next line google does that for you
      //var matchMessageBase64 = mail_rawcontent.indexOf('Content-Transfer-Encoding: base64');
      var mail_messageid="";
      if (matchMessageID!=null){
        mail_messageid=matchMessageID[0];
      }


      for (var k=0;k<spam_from.length;k++){
        if (mail_from.indexOf(spam_from[k])>=0){
          cnt++;
          break;
        }
      }
     
      for (var k=0;k<spam_messageid.length;k++){
        if (mail_messageid.indexOf(spam_messageid[k])>=0){
          cnt++;
          break;
        }
      }

      for (var k=0;k<spam_replyto.length;k++){
        if (mail_replyto.indexOf(spam_replyto[k])>=0){
          cnt++;
          break;
        }
      }

      //see https://tools.ietf.org/html/rfc2045
      //var crcr_location=(mail_rawcontent.IndexOf(cr));
      //var result = regex.exec(mail_rawcontent_lowercase);
      //regex: Message-ID: <[A-Za-z0-9-@]+.tk>   matches Message-ID: <......tk>
      //var name = "John Smith";
      //spammers often leave the Message-ID alone they need it, so we can use it to see if it comes from a bad domain.
      //nowadays there are all new domains for free such as .cf .men .px .ml .gq   etc  I simple block those domains.
      //Examples:
      //Message-ID: <3A03C937-BEA9-4589-867F-593CA3104153@qhgkoctono.ml>
      //Message-ID: <9E2BDE9F-0435-4038-AAE9-4E1B6681DD55@nyonwbeejb.cf>
      //Message-ID: <vdyfn9e6uh7g7gx@u3afzwjx7iql4gx.local.px>
      //Message-ID: <D6C74CE7-CFAD-4E1C-9430-3BEDED41D70D@zgyciflzz.gq>
      //prod.outlook.com|cantv.net|cisco.com
     
      //Message-Id: <592b9af3.a284500a.1f2a9.e747SMTPIN_ADDED_MISSING@mx.google.com>
      if (matchMessageIDBlock!=null){
        if (matchMessageIDBlock.length>0){
          cnt++;
        }
      }
      /*
      else if (mail_from.indexOf('ocn.ne.jp>')!=-1){
        delmsg++;
      }else if (mail_from.indexOf('onmicrosoft.edu')!=-1){
        delmsg++;
      }else if (mail_from.indexOf('.ar>')!=-1){
        delmsg++;
      }else if (mail_from.indexOf('cesterinevitably.com')!=-1){
        delmsg++;
      }else if (mail_from.indexOf('domainandmarket.com')!=-1){
        delmsg++;
      }else if (mail_from.indexOf('permanentbrain.com')!=-1){
        delmsg++;
        }
      */
      else if (matchMessageID!=null){
        if (matchMessageID[0].indexOf('SMTPIN_ADDED_MISSING@mx.google.com')!=-1){
          cnt++;
        }
      }else if (matchMessageReceivedFrom!=null){
        if (matchMessageReceivedFrom.length>0){
          if (matchMessageReceivedFrom.indexOf('.cc')!=-1){
            cnt++;
          }
        }
      }else if (mail_replyto.indexOf('permanentbrain.com')!=-1){
        delmsg++;
      }else if (matchMessageReturnPath!=null){
        if (matchMessageReturnPath.length>=0){
          if (matchMessageReturnPath.indexOf('.at')!=-1){
            cnt++;
          }
        }
      }else if (mail_rawcontent_lowercase.indexOf('<http://bit.ly/')!=-1){
        cnt++;
//      }else if (mail_rawcontent.indexOf('If you do not wish to continue </em> receiving email newsletters')!=-1){
//        delmsg++;
      }else if (mail_rawcontent_lowercase.indexOf('<http://tinyurl.com/')!=-1){
        cnt++;
//      }else if (mail_rawcontent_lowercase.indexOf('a href="<http://tinyurl.com/')!=-1){
//        delmsg++;
//      }else if (mail_rawcontent_lowercase.indexOf('<img src="http://tinyurl.com/')!=-1){
//        delmsg++;
//      }else if (mail_rawcontent_lowercase.indexOf('onmicrosoft.com>')!=-1){
//        delmsg++;
        //Gmail.Users.Messages.remove('me', threads[m].getId());
      }else if (mail_rawcontent.indexOf('.onmicrosoft.com>')!=-1){
          cnt++;
      }
/*  this is not needed mail_body already holds a decoded 64 if it is in base64 or other, thanks to google.
      }else if (mail_rawcontent.indexOf('Content-Transfer-Encoding: base64')!=-1){
        //mail is base64 encoded so decode it
        mail_body=base64decode(mail_body);
      }
*/
       
      if (cnt>0){
        try {
          Gmail.Users.Messages.remove('me', messages[m].getId());
        } finally {
        }
      }
      //Received-SPF: softfail (google.com: domain of transitioning info@info.com does not designate 95.0.185.114 as permitted sender) client-ip=95.0.185.114;
      //intersesting part is here: google.com: ....  does not designate .... as permitted sender  we can use that to block the stupid spammers as well
    }
  }
}


function MoveFromSpamToTrash() {
  var threads=GmailApp.getSpamThreads();

  for (var i=0;i<threads.length;i++){
    //send to trash messages
    var messages=threads[i].getMessages();
    var cnt=0;
    for (var m=0;m<messages.length;m++){
      //get raw mail
      var mail_replyto=messages[m].getReplyTo();
      var mail_from=messages[m].getFrom();
      //var mail_to=messages[m].getTo();
      //var mail_bcc=messages[m].getBcc();
      //var mail_body=messages[m].getBody();
      //var mail_plainbody=messages[m].getPlainBody();  //the body without HTML
      //var mail_cc=messages[m].getCc();
      //var mail_date=messages[m].getDate();
      //var mail_id=messages[m].getId();  //useless info
      var mail_subject=messages[m].getSubject().toLowerCase();
      //var mail_thread=messages[m].getThread();
      //var mail_attachments=messages[m].getAttachments();
      var mail_rawcontent=messages[m].getRawContent();  //the raw email
      var mail_rawcontent_lowercase=mail_rawcontent.toLowerCase();
     
      //var noreturn=mail_rawcontent.findText('Return-Path: <>');
      var spam_from=['gov.cn>','.ru>','.xyz>','.cf>','za','domainandmarket.com>','probalsa.com.ec>','.men>','simplemailwork.com>','yahoo.co.jp>','.edu>','edu.tw>','edu.pl>'];
      spam_from=spam_from.concat(['permanentbrain.com>','cisco.com>']);
      var spam_replyto=['permanentbrain.com>'];
      var spam_messageid=['gov.cn>','.ru>','.xyz>','.cf>','za>','domainandmarket.com>','probalsa.com.ec>','.men>','simplemailwork.com>','edu.tw>','edu.pl>','cisco.com>'];
      spam_messageid=spam_messageid.concat(['.gq>','.ml>','.tk>','.px>','.cf>','.ga>','.jp>','.cf>','.cc>','.it>','.id>','.tr>','.gr>','.ec>','.ar>','.pe>','.co>','.ua>','.cr>','.hk>','.bn>','.za>']);
      spam_messageid=spam_messageid.concat(['in>','br>','id>','it>','mx>','cn>','life>','atlis1>','biz>','glocal.net>','gov.my>','local>','att.net>','prod.outlook.com>','onmicrosoft.com>','localdomain>']);
      spam_messageid=spam_messageid.concat(['trade>','cisco.com>','co.in>','arcamax.com>','alice.it>','co.nz>','gov>','.br>','telkom.net>','vevida.net>','global.net>','SMTPIN_ADDED_MISSING@mx.google.com>']);
      spam_messageid=spam_messageid.concat(['prod.outlook.com>','betemail.com>','cisco.com>']);

      for (var k=0;k<spam_from.length;k++){
        var tmp=spam_from[k];
        if (mail_from.indexOf(spam_from[k])>=0){
          cnt++;
          break;
        }
      }
     
/*
      for (var k=0;k<spam_messageid.length;k++){
        if (mail_messageid.indexOf(spam_messageid[k])>=0){
          cnt++;
          break;
        }
      }
*/
      for (var k=0;k<spam_replyto.length;k++){
        if (mail_replyto.indexOf(spam_replyto[k])>=0){
          cnt++;
          break;
        }
      }

      if (mail_subject.indexOf('million ')!=-1 || mail_subject.indexOf(' beneficiary')!=-1 || mail_subject.indexOf(' nigeria')!=-1){
        cnt++;
      }else if (mail_subject.indexOf('money gram')!=-1){
        cnt++;
      }else if (mail_rawcontent.indexOf('Return-Path: <>')!=-1){
        cnt++;
      }else if (mail_rawcontent.indexOf('User-Agent: Roundcube Webmail')!=-1){  //filter out spam mailer: User-Agent: Roundcube Webmail
        cnt++;
      }else if (mail_rawcontent.indexOf('X-CTCH-Spam:  Bulk')!=-1){
        cnt++;
      }else if (mail_rawcontent.indexOf('Received-SPF: softfail')!=-1){
        cnt++;
      }else if (mail_rawcontent_lowercase.indexOf('lottery')!=-1){
        cnt++;
      }else if (mail_rawcontent_lowercase.indexOf('inheritance')!=-1){
        cnt++;
      }else if (mail_rawcontent_lowercase.indexOf('nigeria')!=-1){
        cnt++;
      }else if (mail_rawcontent_lowercase.indexOf('bank draft')!=-1){
        cnt++;
      }else if (mail_rawcontent.indexOf('Content-Type: application/msword;')!=-1){ //if there are attachments then doc and docx are not allowed
        cnt++;
      }else if ((mail_rawcontent_lowercase.indexOf(' million ')!=-1) || (mail_rawcontent_lowercase.indexOf(' thousand ')!=-1)){
        if (mail_rawcontent_lowercase.indexOf(' usd')!=-1){
        cnt++;
        }else if (mail_rawcontent_lowercase.indexOf(' dollar')!=-1){
        cnt++;
        }
      }else if (mail_rawcontent_lowercase.indexOf(' western union')!=-1){
        cnt++;
      }else if ((mail_rawcontent_lowercase.indexOf(' visa ')!=-1) || (mail_rawcontent_lowercase.indexOf(' master ')!=-1) || (mail_rawcontent_lowercase.indexOf(' atm card')!=-1)){
        cnt++;
      }
      if (cnt!=0){
        messages[m].moveToTrash();
        //Gmail.Users.Messages.remove('me', threads[i].getId());
      }
      //Received-SPF: softfail (google.com: domain of transitioning info@info.com does not designate 95.0.185.114 as permitted sender) client-ip=95.0.185.114;
      //check if it has a return path via RegExp
      //var re = new RegExp('\\.|\\-', 'gi');
      //in the first ten lines should suffice
      //for (var l=0;l<10;l++){
    }
  }
  //TrashMail();
  //RemoveTrashSpam();
}

Wednesday, May 31, 2017

Remove SPAM from gmail automatically 4


Time for a new one..
In this one I catch the SMTPIN_ADDED_MISSING@mx.google.com which a normal mail doesnt appear to have.
Next to that the MessageID itself is seperated from mail source, it is done to check against the SMTPIN_... thingy.
A few domains are added as well. The code is changed to make it more clear what the regexp used is.
Using several regExp expressions makes the code somewhat easier to maintain.
Getting this to work by using a database will be one of the future options. Sadly enough there are not many free databases for google. Of couse it is possible to store the data in a spreadsheet or in a simple text/csv file and read that.

Be aware if you havent updated! I had used the try finally construction from javascript. But this is a construction that does not exist in google script. This makes that MoveFromSpamToTrash did not work. So here is the new version.
2017-06-06: Another update, this is the latest version so far..

 

function RemoveSpamFromTrash(){
  //var regExp = new RegExp("(?:\\s)([a-z]+)", "gi");
  //var regExp = new RegExp("Return-Path: .*onmicrosoft.com>");

  //MessageID ophalen
  var MessageID=RegExp("Message-ID: <.+>","ig");
  // "i" is for case insensitive
  var MessageIDBlock="Message-ID: <[a-zA-Z0-9-@.]+(gq|ml|tk|px|cf|ga|jp|cf|cc|it|id|tr|gr|ec|ar|pe|co|ua|cr|hk|bn|in|br|id|it|mx|cn|life|atlis1|biz|glocal\.net|gov\.my|local|att\.net|prod\.outlook\.com|onmicrosoft\.com|localdomain|trade|cisco\.com|co\.in|arcamax\.com|alice\.it|co\.nz|gov\.br|telkom\.net|vevida\.net|global\.net|[0-9.]+)>";
  var MessageIDSMTPIN="Message-ID: <.+SMTPIN_ADDED_MISSING@mx.google.com>";
  var MessageReturnPath="Return-Path: <.+>";
  var MessageReceivedFrom="Received: from .+ ";
  var MessageReplyTo="Reply-To: <.+>";
  //for breaking up the header and the mailbody we need this
  var crcr=String.fromCharCode(10)+String.fromCharCode(10);
 
  //var regExpMailFrom = RegExp("[a-z-@.]+\.(cisco.com)","ig"); // "i" is for case insensitive
  //for all threads
  //find Content-Transfer-Encoding: base64

  var threads= GmailApp.getTrashThreads();
  for (var i=0;i<threads.length;i++){
    var messages=threads[i].getMessages();
    var messages_length=messages.length;
    var delmsg=0;
    //for all messages in the thread
    for (var m=0;m<messages_length;m++){
      //get raw mail
      var mail_replyto=messages[m].getReplyTo();
      var mail_from=messages[m].getFrom();
      var mail_to=messages[m].getTo();
      var mail_bcc=messages[m].getBcc();
      var mail_cc=messages[m].getCc();
      var mail_body=messages[m].getBody();
      var mail_plainbody=messages[m].getPlainBody();  //the body without HTML
      var mail_date=messages[m].getDate();
      var mail_id=messages[m].getId();
      var mail_subject=messages[m].getSubject().toLowerCase();
      //var mail_thread=messages[m].getThread();
      //var mail_attachments=messages[m].getAttachments();
      var mail_rawcontent=messages[m].getRawContent();  //the raw email
      var mail_rawcontent_lowercase=mail_rawcontent.toLowerCase();
      var x=mail_rawcontent.length;
      var cr=String.fromCharCode(13);
      var lf=String.fromCharCode(10);
      var crcr_location=mail_rawcontent.indexOf(cr+lf+cr+lf);
      var mail_header=mail_rawcontent.substr(0,crcr_location);
      var mail_header_lines=mail_header.split(cr+lf);
      //see https://tools.ietf.org/html/rfc2045
      //var crcr_location=(mail_rawcontent.IndexOf(cr));
      //var result = regex.exec(mail_rawcontent_lowercase);
      //regex: Message-ID: <[A-Za-z0-9-@]+.tk>   matches Message-ID: <......tk>
      //var name = "John Smith";
      //spammers often leave the Message-ID alone they need it, so we can use it to see if it comes from a bad domain.
      //nowadays there are all new domains for free such as .cf .men .px .ml .gq   etc  I simple block those domains.
      //Examples:
      //Message-ID: <3A03C937-BEA9-4589-867F-593CA3104153@qhgkoctono.ml>
      //Message-ID: <9E2BDE9F-0435-4038-AAE9-4E1B6681DD55@nyonwbeejb.cf>
      //Message-ID: <vdyfn9e6uh7g7gx@u3afzwjx7iql4gx.local.px>
      //Message-ID: <D6C74CE7-CFAD-4E1C-9430-3BEDED41D70D@zgyciflzz.gq>
      //prod.outlook.com|cantv.net|cisco.com
      var matchMessageID = mail_header.match(MessageID,"ig");
      var matchMessageReceivedFrom = mail_header.match(MessageReceivedFrom,"ig");
      var matchMessageIDBlock = mail_header.match(MessageIDBlock,"ig");
      var matchMessageIDSMTPIN = mail_header.match(MessageIDSMTPIN,"ig");
      var matchMessageReturnPath = mail_header.match(MessageReturnPath,"ig");
      var matchMessageReplyTo = mail_header.match(MessageReplyTo,"ig");
      //var matchMessageReceivedFrom=regExpMessageReceivedFrom.exec(mail_header);
      //dont bother the next line google does that for you
      //var matchMessageBase64 = mail_rawcontent.indexOf('Content-Transfer-Encoding: base64');
     
      //Message-Id: <592b9af3.a284500a.1f2a9.e747SMTPIN_ADDED_MISSING@mx.google.com>
      //var matchGoogleMessageID = regExpMessageIDMISSING.exec(mail_rawcontent);
      //var inMessageID = regExpMessageID.exec(mail_rawcontent);
      //var matchMailFrom = regExpMailFrom.exec(mail_from);
      //Message-Id: <592b9af3.a284500a.1f2a9.e747SMTPIN_ADDED_MISSING@mx.google.com>
      if (matchMessageID.length>0){
        if (matchMessageID[0].indexOf('SMTPIN_ADDED_MISSING@mx.google.com')!=-1){
          delmsg++;
        }
      }

      if (matchMessageID.length>0){
        delmsg++;
      }if (matchMessageReceivedFrom.length>0){
        if (matchMessageReceivedFrom.indexOf('.cc')!=-1){
          delmsg++;
        }
      }else if (mail_replyto.indexOf('permanentbrain.com')!=-1){
        delmsg++;
      }else if (matchMessageReturnPath.length>=0){
        if (matchMessageReturnPath.indexOf('.at')!=-1){
          delmsg++;
        }
      }else if (mail_from.indexOf('ocn.ne.jp>')!=-1){
        delmsg++;
      }else if (mail_from.indexOf('.ar>')!=-1){
        delmsg++;
      }else if (mail_from.indexOf('cesterinevitably.com')!=-1){
        delmsg++;
      }else if (mail_from.indexOf('domainandmarket.com')!=-1){
        delmsg++;
      }else if (mail_from.indexOf('permanentbrain.com')!=-1){
        delmsg++;
      }else if (mail_rawcontent_lowercase.indexOf('<http://bit.ly/')!=-1){
        delmsg++;
      }else if (mail_rawcontent.indexOf('If you do not wish to continue </em> receiving email newsletters')!=-1){
        delmsg++;
      }else if (mail_rawcontent_lowercase.indexOf('<http://tinyurl.com/')!=-1){
        delmsg++;
//      }else if (mail_rawcontent_lowercase.indexOf('a href="<http://tinyurl.com/')!=-1){
//        delmsg++;
//      }else if (mail_rawcontent_lowercase.indexOf('<img src="http://tinyurl.com/')!=-1){
//        delmsg++;
//      }else if (mail_rawcontent_lowercase.indexOf('onmicrosoft.com>')!=-1){
//        delmsg++;
        //Gmail.Users.Messages.remove('me', threads[m].getId());
      }else if (mail_rawcontent.indexOf('.onmicrosoft.com>')!=-1){
          delmsg++;
      }
/*
      }else if (mail_rawcontent.indexOf('Content-Transfer-Encoding: base64')!=-1){
        //mail is base64 encoded so decode it
        mail_body=base64decode(mail_body);
      }
*/
       
      //var regExp = RegExp("Message-ID: <[a-zA-Z0-9-@.]+\.(gq|ml|tk|px|cf|ga|jp|cf|it|cantv.net)>","ig"); // "i" is for case insensitive
      if (delmsg>0){
        try {
          Gmail.Users.Messages.remove('me', messages[m].getId());
        } finally {
        }
      }
      //Received-SPF: softfail (google.com: domain of transitioning info@info.com does not designate 95.0.185.114 as permitted sender) client-ip=95.0.185.114;
      //intersesting part is here: google.com: ....  does not designate .... as permitted sender  we can use that to block the stupid spammers as well
    }
  }
}


function MoveFromSpamToTrash() {
  var threads=GmailApp.getSpamThreads();
  for (var i=0;i<threads.length;i++){
    //send to trash messages
    var messages=threads[i].getMessages();
    var spamcnt=0;
    for (var m=0;m<messages.length;m++){
      //get raw mail
      var mail_replyto=messages[m].getReplyTo();
      var mail_from=messages[m].getFrom();
      //var mail_to=messages[m].getTo();
      //var mail_bcc=messages[m].getBcc();
      //var mail_body=messages[m].getBody();
      //var mail_plainbody=messages[m].getPlainBody();  //the body without HTML
      //var mail_cc=messages[m].getCc();
      //var mail_date=messages[m].getDate();
      //var mail_id=messages[m].getId();  //useless info
      var mail_subject=messages[m].getSubject().toLowerCase();
      //var mail_thread=messages[m].getThread();
      //var mail_attachments=messages[m].getAttachments();
      var mail_rawcontent=messages[m].getRawContent();  //the raw email
      var mail_rawcontent_lowercase=mail_rawcontent.toLowerCase();
     
      //var noreturn=mail_rawcontent.findText('Return-Path: <>');
      if (mail_from.indexOf('.gov.cn')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('.ru')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('.xyz')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('.cf>')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('simplemailwork.com')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('probalsa.com.ec')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('.men')!=-1){
        spamcnt++;
      }else if (mail_subject.indexOf('million ')!=-1 || mail_subject.indexOf(' beneficiary')!=-1 || mail_subject.indexOf(' nigeria')!=-1){
        spamcnt++;
      }else if (mail_subject.indexOf('money gram')){
        spamcnt++;
      }else if (mail_rawcontent.indexOf('Return-Path: <>')!=-1){
        spamcnt++;
      }else if (mail_rawcontent.indexOf('User-Agent: Roundcube Webmail')!=-1){  //filter out spam mailer: User-Agent: Roundcube Webmail
        spamcnt++;
      }else if (mail_rawcontent.indexOf('X-CTCH-Spam:  Bulk')!=-1){
        spamcnt++;
      }else if (mail_rawcontent.indexOf('Received-SPF: softfail')!=-1){
        spamcnt++;
      }else if (mail_rawcontent_lowercase.indexOf('lottery')!=-1){
        spamcnt++;
      }else if (mail_rawcontent_lowercase.indexOf('inheritance')!=-1){
        spamcnt++;
      }else if (mail_rawcontent_lowercase.indexOf('nigeria')!=-1){
        spamcnt++;
      }else if (mail_rawcontent_lowercase.indexOf('bank draft')!=-1){
        spamcnt++;
      }else if (mail_rawcontent.indexOf('Content-Type: application/msword;')!=-1){ //if there are attachments then doc and docx are not allowed
        spamcnt++;
      }else if ((mail_rawcontent_lowercase.indexOf(' million ')!=-1) || (mail_rawcontent_lowercase.indexOf(' thousand ')!=-1)){
        if (mail_rawcontent_lowercase.indexOf(' usd')!=-1){
        spamcnt++;
        }else if (mail_rawcontent_lowercase.indexOf(' dollar')!=-1){
        spamcnt++;
        }
      }else if (mail_rawcontent_lowercase.indexOf(' western union')!=-1){
        spamcnt++;
      }else if ((mail_rawcontent_lowercase.indexOf(' visa ')!=-1) || (mail_rawcontent_lowercase.indexOf(' master ')!=-1) || (mail_rawcontent_lowercase.indexOf(' atm card')!=-1)){
        spamcnt++;
      }
      if (spamcnt!=0){
        messages[m].moveToTrash();
        //Gmail.Users.Messages.remove('me', threads[i].getId());
      }
      //Received-SPF: softfail (google.com: domain of transitioning info@info.com does not designate 95.0.185.114 as permitted sender) client-ip=95.0.185.114;
      //check if it has a return path via RegExp
      //var re = new RegExp('\\.|\\-', 'gi');
      //in the first ten lines should suffice
      //for (var l=0;l<10;l++){
    }
  }
  //TrashMail();
  //RemoveTrashSpam();
}

function TrashMail() {
  var threads = GmailApp.search("in:trash");
  for (var i = 0; i < threads.length; i++) {
    Gmail.Users.Messages.remove('me', threads[i].getId());
  }
}

Wednesday, May 24, 2017

Remove SPAM from gmail automatically 3

Another version I made:
I am planning to improve it with by using an excel sheet where the filters will be defined.
Running a regex is fine but not everybody knows how to put a regex together.
So a future version will filter the mail header and mail body and then check the mail headers against the spreadsheet data.. That way I dont need to create code in order to filter the mail. All I need then is to add data in my spreadsheet and it will filter on what is in the spreadsheet.
Fixed: code try finally to try catch

function RemoveSpamFromTrash(){
  //var regExp = new RegExp("(?:\\s)([a-z]+)", "gi");
  //var regExp = new RegExp("Return-Path: .*onmicrosoft.com>");

  // "i" is for case insensitive
  var regExpMessageID = RegExp("Message-ID: <[a-zA-Z0-9-@.]+(gq|ml|tk|px|cf|ga|jp|cf|it|tr|gr|ec|ar|pe|co|ua|cr|hk|bn|br|it|mx|life|atlis1|gov\.my|cantv\.net|local|att\.net|prod\.outlook\.com|onmicrosoft\.com|localdomain|trade|cisco\.com|co\.in|arcamax\.com|alice\.it|gov\.br|telkom\.net|vevida\.net|[0-9.]+)>","ig");

  //MessageID ophalen
  var MessageID=RegExp("Message-ID: <.+>","g");
  //var regExpMailFrom = RegExp("[a-z-@.]+\.(cisco.com)","ig"); // "i" is for case insensitive
  //for all threads

  var threads= GmailApp.getTrashThreads();
  for (var i=0;i<threads.length;i++){
    var messages=threads[i].getMessages();
    var messages_length=messages.length;
    var delmsg=0;
    //for all messages in the thread
    for (var m=0;m<messages_length;m++){
      //get raw mail
      var mail_replyto=messages[m].getReplyTo();
      var mail_from=messages[m].getFrom();
      //var mail_to=messages[m].getTo();
      //var mail_bcc=messages[m].getBcc();
      var mail_body=messages[m].getBody();
      var mail_plainbody=messages[m].getPlainBody();  //the body without HTML
      //var mail_cc=messages[m].getCc();
      //var mail_date=messages[m].getDate();
      //var mail_id=messages[m].getId();
      var mail_subject=messages[m].getSubject().toLowerCase();
      //var mail_thread=messages[m].getThread();
      //var mail_attachments=messages[m].getAttachments();
      var mail_rawcontent=messages[m].getRawContent();  //the raw email
      var mail_rawcontent_lowercase=mail_rawcontent.toLowerCase();
      //var result = regex.exec(mail_rawcontent_lowercase);
      //regex: Message-ID: <[A-Za-z0-9-@]+.tk>   matches Message-ID: <......tk>
      //var name = "John Smith";
      //spammers often leave the Message-ID alone they need it, so we can use it to see if it comes from a bad domain.
      //nowadays there are all new domains for free such as .cf .men .px .ml .gq   etc  I simple block those domains.
      //Examples:
      //Message-ID: <3A03C937-BEA9-4589-867F-593CA3104153@qhgkoctono.ml>
      //Message-ID: <9E2BDE9F-0435-4038-AAE9-4E1B6681DD55@nyonwbeejb.cf>
      //Message-ID: <vdyfn9e6uh7g7gx@u3afzwjx7iql4gx.local.px>
      //Message-ID: <D6C74CE7-CFAD-4E1C-9430-3BEDED41D70D@zgyciflzz.gq>
      //prod.outlook.com|cantv.net|cisco.com
      var matchMessageID = regExpMessageID.exec(mail_rawcontent);
      //var inMessageID = regExpMessageID.exec(mail_rawcontent);
      //var matchMailFrom = regExpMailFrom.exec(mail_from);
      if (matchMessageID!=null){
        delmsg++;
      }else if (mail_rawcontent.indexOf('ImwiOiA2NCwgInMiOiAwLCAidSI6IDI0NzY4ODQyMywgInQiOiAxLCAic2Qi')!=-1){
        delmsg++;
      }else if (mail_from.indexOf('ocn.ne.jp')!=-1){
        delmsg++;
      }else if (mail_from.indexOf('domainandmarket.com')!=-1){
        delmsg++;
      }else if (mail_rawcontent_lowercase.indexOf('<http://bit.ly/')!=-1){
        delmsg++;
      }else if (mail_rawcontent.indexOf('If you do not wish to continue </em> receiving email newsletters')!=-1){
        delmsg++;
      }else if (mail_rawcontent_lowercase.indexOf('<http://tinyurl.com/')!=-1){
        delmsg++;
//      }else if (mail_rawcontent_lowercase.indexOf('a href="<http://tinyurl.com/')!=-1){
//        delmsg++;
//      }else if (mail_rawcontent_lowercase.indexOf('<img src="http://tinyurl.com/')!=-1){
//        delmsg++;
//      }else if (mail_rawcontent_lowercase.indexOf('onmicrosoft.com>')!=-1){
//        delmsg++;
        //Gmail.Users.Messages.remove('me', threads[m].getId());
      }else if (mail_rawcontent.indexOf('Content-Transfer-Encoding: base64')!=-1){
        if (mail_rawcontent.indexOf('.onmicrosoft.com>')!=-1){
        delmsg++;
        }
      }
     
      //var regExp = RegExp("Message-ID: <[a-zA-Z0-9-@.]+\.(gq|ml|tk|px|cf|ga|jp|cf|it|cantv.net)>","ig"); // "i" is for case insensitive
      if (delmsg>0){
        try {
          Gmail.Users.Messages.remove('me', messages[m].getId());
        } catch(e) {
        }
      }
      //Received-SPF: softfail (google.com: domain of transitioning info@info.com does not designate 95.0.185.114 as permitted sender) client-ip=95.0.185.114;
      //intersesting part is here: google.com: ....  does not designate .... as permitted sender  we can use that to block the stupid spammers as well
    }
  }
}


function MoveFromSpamToTrash() {
  var threads=GmailApp.getSpamThreads();
  for (var i=0;i<threads.length;i++){
    //send to trash messages
    var messages=threads[i].getMessages();
    var spamcnt=0;
    for (var m=0;m<messages.length;m++){
      //get raw mail
      var mail_replyto=messages[m].getReplyTo();
      var mail_from=messages[m].getFrom();
      //var mail_to=messages[m].getTo();
      //var mail_bcc=messages[m].getBcc();
      //var mail_body=messages[m].getBody();
      //var mail_plainbody=messages[m].getPlainBody();  //the body without HTML
      //var mail_cc=messages[m].getCc();
      //var mail_date=messages[m].getDate();
      //var mail_id=messages[m].getId();  //useless info
      var mail_subject=messages[m].getSubject().toLowerCase();
      //var mail_thread=messages[m].getThread();
      //var mail_attachments=messages[m].getAttachments();
      var mail_rawcontent=messages[m].getRawContent();  //the raw email
      var mail_rawcontent_lowercase=mail_rawcontent.toLowerCase();
     
      //var noreturn=mail_rawcontent.findText('Return-Path: <>');
      if (mail_from.indexOf('.gov.cn')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('.ru')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('.xyz')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('.cf>')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('.men>')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('simplemailwork.com')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('probalsa.com.ec')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('.men')!=-1){
        spamcnt++;
      }else if (mail_subject.indexOf('million ') || mail_subject.indexOf(' beneficiary') || mail_subject.indexOf(' nigeria')){
        spamcnt++;
      }else if (mail_subject.indexOf('money gram')){
        spamcnt++;
      }else if (mail_rawcontent.indexOf('Return-Path: <>')!=-1){
        spamcnt++;
      }else if (mail_rawcontent.indexOf('User-Agent: Roundcube Webmail')!=-1){  //filter out spam mailer: User-Agent: Roundcube Webmail
        spamcnt++;
      }else if (mail_rawcontent.indexOf('X-CTCH-Spam:  Bulk')!=-1){
        spamcnt++;
      }else if (mail_rawcontent.indexOf('Received-SPF: softfail')!=-1){
        spamcnt++;
      }else if (mail_rawcontent_lowercase.indexOf('lottery')!=-1){
        spamcnt++;
      }else if (mail_rawcontent_lowercase.indexOf('inheritance')!=-1){
        spamcnt++;
      }else if (mail_rawcontent_lowercase.indexOf('nigeria')!=-1){
        spamcnt++;
      }else if (mail_rawcontent_lowercase.indexOf('bank draft')!=-1){
        spamcnt++;
      }else if (mail_rawcontent.indexOf('Content-Type: application/msword;')!=-1){ //if there are attachments then doc and docx are not allowed
        spamcnt++;
      }else if ((mail_rawcontent_lowercase.indexOf(' million ')!=-1) || (mail_rawcontent_lowercase.indexOf(' thousand ')!=-1)){
        if (mail_rawcontent_lowercase.indexOf(' usd')!=-1){
        spamcnt++;
        }else if (mail_rawcontent_lowercase.indexOf(' dollar')!=-1){
        spamcnt++;
        }
      }else if (mail_rawcontent_lowercase.indexOf(' western union')!=-1){
        spamcnt++;
      }else if ((mail_rawcontent_lowercase.indexOf(' visa ')!=-1) || (mail_rawcontent_lowercase.indexOf(' master ')!=-1) || (mail_rawcontent_lowercase.indexOf(' atm card')!=-1)){
        spamcnt++;
      }
      if (spamcnt!=0){
        messages[m].moveToTrash();
        //Gmail.Users.Messages.remove('me', threads[i].getId());
      }
      //Received-SPF: softfail (google.com: domain of transitioning info@info.com does not designate 95.0.185.114 as permitted sender) client-ip=95.0.185.114;
      //check if it has a return path via RegExp
      //var re = new RegExp('\\.|\\-', 'gi');
      //in the first ten lines should suffice
      //for (var l=0;l<10;l++){
    }
  }
  //TrashMail();
  //RemoveTrashSpam();
}


function TrashMail() {
  var threads = GmailApp.search("in:trash");
  for (var i = 0; i < threads.length; i++) {
    Gmail.Users.Messages.remove('me', threads[i].getId());
  }
}

Monday, March 13, 2017

Remove SPAM from gmail automatically 2


One of the annoying this is that spammers learn new tricks.
My luck is that spammers are usually using generators to create loads of spam. 
This means that there are parts in the spam mail that always look similar. The trick is to find the similar parts and then simply filter that out. But do that well enough to to filter out the proper emails. This usually can be done by finding long strings that are the same in the spam messages.

In this new version you can also delete messages in such a way that they immediately disappear. For this you need more advanced google script commands.

In order to get this to work you need to go to the advanced services for your script:


Then choose to enable the gmail api:
 
Then is says you also have to enable the services in the gmail api console:
Then choose Google API Console:
Pick out the GMail API and enable the methods gmail.users.messages.remove or take it simpler and enable all the gmail api methods.


 Then simply test your code by running RemoveTrashSpam.



My complete code so far (until I change it again). I found an error in the previous version so here another update..

Time for a well improved update!! 
Spammers now apparantly use spam domains, domains they can get for free or a minor payment. Since these are almost always spam domains I simply block them all using a regex.

20170405: Minor update - added co.jp to the Message-ID blocklist
20170425: Minor update - added to the regex for the Message-ID, some spam comes from 192.3.something so this gets deleted as well.
20170427: Minor update - added .pe and .co domains, fixed some code like a.b which should be a\.b
20170601: Fixed: code try finally to try catch

function FilterSpam() {

  var threads= GmailApp.getSpamThreads();
  for (var i=0;i<threads.length;i++){
    //send to trash messages
    var messages=threads[i].getMessages();
    var spamcnt=0;
    for (var m=0;m<messages.length;m++){
      //get raw mail
      var mail_replyto=messages[m].getReplyTo();
      var mail_from=messages[m].getFrom();
      //var mail_to=messages[m].getTo();
      //var mail_bcc=messages[m].getBcc();
      //var mail_body=messages[m].getBody();
      //var mail_plainbody=messages[m].getPlainBody();  //the body without HTML
      //var mail_cc=messages[m].getCc();
      //var mail_date=messages[m].getDate();
      //var mail_id=messages[m].getId();  //not very interesting
      var mail_subject=messages[m].getSubject().toLowerCase();
      //var mail_thread=messages[m].getThread();
      //var mail_attachments=messages[m].getAttachments();
      var mail_rawcontent=messages[m].getRawContent();  //the raw email
      var mail_rawcontent_lowercase=mail_rawcontent.toLowerCase();
     
      //var noreturn=mail_rawcontent.findText('Return-Path: <>');
      if (mail_from.indexOf('.gov.cn')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('.ru')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('.xyz')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('simplemailwork.com')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('probalsa.com.ec')!=-1){
        spamcnt++;
      }else if (mail_subject.indexOf('million ') || mail_subject.indexOf(' beneficiary') || mail_subject.indexOf(' nigeria')){
        spamcnt++;
      }else if (mail_subject.indexOf('money gram')){
        spamcnt++;
      }else if (mail_rawcontent.indexOf('Return-Path: <>')!=-1){
        spamcnt++;
      }else if (mail_rawcontent.indexOf('User-Agent: Roundcube Webmail')!=-1){  //filter out spam mailer: User-Agent: Roundcube Webmail
        spamcnt++;
      }else if (mail_rawcontent.indexOf('X-CTCH-Spam:  Bulk')!=-1){
        spamcnt++;
      }else if (mail_rawcontent.indexOf('Received-SPF: softfail')!=-1){
        spamcnt++;
      }else if (mail_rawcontent_lowercase.indexOf('lottery')!=-1){
        spamcnt++;
      }else if (mail_rawcontent_lowercase.indexOf('inheritance')!=-1){
        spamcnt++;
      }else if (mail_rawcontent_lowercase.indexOf('nigeria')!=-1){
        spamcnt++;
      }else if (mail_rawcontent_lowercase.indexOf('bank draft')!=-1){
        spamcnt++;
      }else if (mail_rawcontent.indexOf('Content-Type: application/msword;')!=-1){ //if there are attachments then doc and docx are not allowed
        spamcnt++;
      }else if ((mail_rawcontent_lowercase.indexOf(' million ')!=-1) || (mail_rawcontent_lowercase.indexOf(' thousand ')!=-1)){
        if (mail_rawcontent_lowercase.indexOf(' usd')!=-1){
        spamcnt++;
        }else if (mail_rawcontent_lowercase.indexOf(' dollar')!=-1){
        spamcnt++;
        }
      }else if (mail_rawcontent_lowercase.indexOf(' western union')!=-1){
        spamcnt++;
      }else if ((mail_rawcontent_lowercase.indexOf(' visa ')!=-1) || (mail_rawcontent_lowercase.indexOf(' master ')!=-1) || (mail_rawcontent_lowercase.indexOf(' atm card')!=-1)){
        spamcnt++;
      }
      if (spamcnt!=0){
        messages[m].moveToTrash();
        //Gmail.Users.Messages.remove('me', threads[i].getId());
      }
      //Received-SPF: softfail (google.com: domain of transitioning info@info.com does not designate 95.0.185.114 as permitted sender) client-ip=95.0.185.114;
      //check if it has a return path via RegExp
      //var re = new RegExp('\\.|\\-', 'gi');
      //in the first ten lines should suffice
      //for (var l=0;l<10;l++){
    }
  }
  //TrashMail();
  //RemoveTrashSpam();
}


function RemoveTrashSpam(){
  //var regExp = new RegExp("(?:\\s)([a-z]+)", "gi");
  //var regExp = new RegExp("Return-Path: .*onmicrosoft.com>");

  // "i" is for case insensitive
  var regExpMessageID = RegExp("Message-ID: <[a-zA-Z0-9-@.]+(gq|ml|tk|px|cf|ga|jp|cf|it|tr|gr|ec|ar|pe|co|cantv\.net|local|prod\.outlook.com|onmicrosoft\.com|localdomain|trade|cisco\.com|co\.in|arcamax\.com|alice\.it|gov\.br|[0-9.]+)>","ig");
  //var regExpMailFrom = RegExp("[a-z-@.]+\.(cisco.com)","ig"); // "i" is for case insensitive
  //for all threads
  var threads= GmailApp.getTrashThreads();
  for (var i=0;i<threads.length;i++){
    var messages=threads[i].getMessages();
    var messages_length=messages.length;
    var delmsg=0;
    //for all messages in the thread
    for (var m=0;m<messages_length;m++){
      //get raw mail
      var mail_replyto=messages[m].getReplyTo();
      var mail_from=messages[m].getFrom();
      //var mail_to=messages[m].getTo();
      //var mail_bcc=messages[m].getBcc();
      var mail_body=messages[m].getBody();
      var mail_plainbody=messages[m].getPlainBody();  //the body without HTML
      //var mail_cc=messages[m].getCc();
      //var mail_date=messages[m].getDate();
      //var mail_id=messages[m].getId();
      var mail_subject=messages[m].getSubject().toLowerCase();
      //var mail_thread=messages[m].getThread();
      //var mail_attachments=messages[m].getAttachments();
      var mail_rawcontent=messages[m].getRawContent();  //the raw email
      var mail_rawcontent_lowercase=mail_rawcontent.toLowerCase();
      //var result = regex.exec(mail_rawcontent_lowercase);
      //regex: Message-ID: <[A-Za-z0-9-@]+.tk>   matches Message-ID: <......tk>
      //var name = "John Smith";
      //spammers often leave the Message-ID alone they need it, so we can use it to see if it comes from a bad domain.
      //nowadays there are all new domains for free such as .cf .men .px .ml .gq   etc  I simple block those domains.
      //Examples:
      //Message-ID: <3A03C937-BEA9-4589-867F-593CA3104153@qhgkoctono.ml>
      //Message-ID: <9E2BDE9F-0435-4038-AAE9-4E1B6681DD55@nyonwbeejb.cf>
      //Message-ID: <vdyfn9e6uh7g7gx@u3afzwjx7iql4gx.local.px>
      //Message-ID: <D6C74CE7-CFAD-4E1C-9430-3BEDED41D70D@zgyciflzz.gq>
      //prod.outlook.com|cantv.net|cisco.com
      var matchMessageID = regExpMessageID.exec(mail_rawcontent);
      //var matchMailFrom = regExpMailFrom.exec(mail_from);
      if (matchMessageID!=null){
        delmsg++;
      }else if (mail_rawcontent.indexOf('ImwiOiA2NCwgInMiOiAwLCAidSI6IDI0NzY4ODQyMywgInQiOiAxLCAic2Qi')!=-1){
        delmsg++;
      }else if (mail_from.indexOf('ocn.ne.jp')!=-1){
        delmsg++;
      }else if (mail_rawcontent_lowercase.indexOf('<http://bit.ly/')!=-1){
        delmsg++;
      }else if (mail_rawcontent.indexOf('If you do not wish to continue </em> receiving email newsletters')!=-1){
        delmsg++;
      }else if (mail_rawcontent_lowercase.indexOf('<http://tinyurl.com/')!=-1){
        delmsg++;
//      }else if (mail_rawcontent_lowercase.indexOf('a href="<http://tinyurl.com/')!=-1){
//        delmsg++;
//      }else if (mail_rawcontent_lowercase.indexOf('<img src="http://tinyurl.com/')!=-1){
//        delmsg++;
//      }else if (mail_rawcontent_lowercase.indexOf('onmicrosoft.com>')!=-1){
//        delmsg++;
        //Gmail.Users.Messages.remove('me', threads[m].getId());
      }else if (mail_rawcontent.indexOf('Content-Transfer-Encoding: base64')!=-1){
        if (mail_rawcontent.indexOf('.onmicrosoft.com>')!=-1){
        delmsg++;
        }
      }
     
      //var regExp = RegExp("Message-ID: <[a-zA-Z0-9-@.]+\.(gq|ml|tk|px|cf|ga|jp|cf|it|cantv.net)>","ig"); // "i" is for case insensitive
      if (delmsg>0){
        try {
          Gmail.Users.Messages.remove('me', messages[m].getId());
        } catch(e) {
        }
      }
      //Received-SPF: softfail (google.com: domain of transitioning info@info.com does not designate 95.0.185.114 as permitted sender) client-ip=95.0.185.114;
      //intersesting part is here: google.com: ....  does not designate .... as permitted sender  we can use that to block the stupid spammers as well
    }
  }
}