Monday, July 3, 2017

Remove SPAM from gmail automatically 6

In this latest version I just put the two main routines together in one. So RemoveFromFolder is now the main routine. This is called from RemoveFromTrash or RemoveFromSpam.
The first parameter in RemoveFromFolder is which folder is going to be scanned, either the TRASH folder (sometimes called BIN) or de SPAM folder. The second one is to tell what it must do, delete or put to the trash can.
It makes little sense to remove files from the trashcan into trash by using RemoveFromFolder('TRASH','TRASH') of course. So here is the code...
2017-07-20: minor fix with matchSender


function RemoveFromTrash(){
  RemoveFromFolder('TRASH','DELETE');
}

function RemoveFromSpam(){
  RemoveFromFolder('SPAM','DELETE');
}

function RemoveFromFolder(FolderName,ActionType){
  if (FolderName=='TRASH'){
    var threads=GmailApp.getTrashThreads();
  }else if (FolderName=='SPAM'){
    var threads=GmailApp.getSpamThreads();
  }
  //var regExp = new RegExp("(?:\\s)([a-z]+)", "gi");
  //var regExp = new RegExp("Return-Path: .*onmicrosoft.com>");

  //get MessageID
  // "i" is for case insensitive
  var MessageID=RegExp("Message-ID: <.+>","ig");
  var MessageReturnPath=RegExp("Return-Path: <.+>","ig");
  var MessageReceivedFrom=RegExp("Received: from .+ ","ig");
  var MessageReplyTo=RegExp("Reply-To: <.+>","ig");
  var MessageDkimSignature=RegExp("DKIM-Signature: <.+>","ig");
  var MessageReceived=RegExp("Received: .+","ig");
  var MessageXAuthenticatedUser=RegExp("X-Authenticated-User: .+","ig");
  var MessageSender=RegExp("Sender: .+","ig");

  //var MessageIDBlock=RegExp("Message-ID: <[a-zA-Z0-9-@.]+(gq|ml|tk|px|cf|ga|jp|cf|cc|it|id|tr|cl|gr|ec|ar|pe|co|ua|cr|hk|bn|za|in|br|id|it|mx|cn|life|atlis1|biz|glocal\.net|gov\.my|local|att\.net|prod\.outlook\.com|onmicrosoft\.com|localdomain|trade|cisco\.com|co\.in|arcamax\.com|alice\.it|co\.nz|gov\.br|telkom\.net|vevida\.net|global\.net|[0-9.]+)>");

  //for breaking up the header and the mailbody we need this
  var crcr=String.fromCharCode(10)+String.fromCharCode(10);
 
  //var regExpMailFrom = RegExp("[a-z-@.]+\.(cisco.com)","ig"); // "i" is for case insensitive
  //for all threads
  //find Content-Transfer-Encoding: base64

  //All spam rules are in here
  var spam_from,spam_replyto,spam_messageid,spam_returnpath,spam_dkim_signature,spam_sender,spam_body;
  spam_from=['.gov.cn>','.ru>','.xyz>','.cf>','.za>','.domainandmarket.com>','.com.ec>','.men>','.simplemailwork.com>','.yahoo.co.jp>','.edu>','.edu.tw>','.edu.pl>','.uy>'];
  spam_from=spam_from.concat(['.cesterinevitably.com>','.permanentbrain.com>','.pro>','.cisco.com>','.gate01.com>','.co.com>','.amazonrewardonline.com>','.kingdryer.com>','.life>']);
  spam_from=spam_from.concat(['.perposed.com>','.muysle.us>','.telkomsa.net>','.ocn.ne.jp>','.onmicrosoft.edu>','.cc>','.marektbisniss.com>','.cc>','.offer.com>','emarketingit.com>']);
  spam_from=spam_from.concat(['.ws>','.top>','.ml>','.us>','.ga>','listtopcorp.com>']);
  spam_replyto=['.permanentbrain.com>'];
  spam_messageid=['.gov.cn>','.ru>','.xyz>','.cf>','.za>','.domainandmarket.com>','.probalsa.com.ec>','.men>','.simplemailwork.com>','.edu.tw>','.edu.pl>','.cisco.com>'];
  spam_messageid=spam_messageid.concat(['.gq>','.ml>','.tk>','.px>','.cf>','.ga>','.jp>','.cf>','.cc>','.it>','.id>','.tr>','.gr>','.ec>','.ar>','.pe>','.co>','.ua>','.cr>','.hk>','.bn>','.za>']);
  spam_messageid=spam_messageid.concat(['.in>','.br>','.id>','.it>','.mx>','.cn>','.life>','.atlis1>','.biz>','.glocal.net>','.gov.my>','.local>','.att.net>','.prod.outlook.com>','.onmicrosoft.com>']);
  spam_messageid=spam_messageid.concat(['.trade>','.cisco.com>','.co.in>','.arcamax.com>','.alice.it>','.co.nz>','.gov>','.br>','.telkom.net>','.vevida.net>','.global.net>','SMTPIN_ADDED_MISSING@mx.google.com>']);
  spam_messageid=spam_messageid.concat(['.prod.outlook.com>','.betemail.com>','.cisco.com>','.openasiagroup.com>','.vozoti.us>','.itcrje.us>','.dowjones.net>','@email.amazonses.com>']);
  spam_messageid=spam_messageid.concat(['.localdomain>','.es>','co.id']);
  spam_returnpath=['sendgrid.net>','ikexpress.com>','ddadr.com>','1and1.fr>','.com.co>'];
  spam_dkim_signature=['.ccsend.com;'];
  spam_sender=['.mandrillapp.com>'];
  spam_body=['.dma.trade/'];
  //var spam_authentication_results=[Authentication-Results

  for (var i=0;i<threads.length;i++){
    var messages=threads[i].getMessages();
    var messages_length=messages.length;
    var cnt=0;
    //for all messages in the thread
    for (var m=0;m<messages_length;m++){
      //get mail parts that are interesting
      var mail_replyto=messages[m].getReplyTo();
      var mail_from=messages[m].getFrom();
      var mail_to=messages[m].getTo();
      var mail_bcc=messages[m].getBcc();
      var mail_cc=messages[m].getCc();
      var mail_body=messages[m].getBody();
      var mail_plainbody=messages[m].getPlainBody();  //the body without HTML
      var mail_date=messages[m].getDate();
      var mail_id=messages[m].getId();
      var mail_subject=messages[m].getSubject().toLowerCase();
      //var mail_thread=messages[m].getThread();
      //var mail_attachments=messages[m].getAttachments();
      var mail_rawcontent=messages[m].getRawContent();  //the raw email
      var mail_rawcontent_lowercase=mail_rawcontent.toLowerCase();
     
      //get mail header and get the mail_header lines
      var x=mail_rawcontent.length;
      var cr=String.fromCharCode(13);
      var lf=String.fromCharCode(10);
      var crcr_location=mail_rawcontent.indexOf(cr+lf+cr+lf);
      var mail_header=mail_rawcontent.substr(0,crcr_location);
      var mail_header_lines=mail_header.split(cr+lf);

      //set regex for special parts
      var matchMessageID = mail_header.match(MessageID,"ig");
      var matchMessageReceivedFrom = mail_header.match(MessageReceivedFrom,"ig");
      var matchMessageReturnPath = mail_header.match(MessageReturnPath,"ig");
      var matchMessageReplyTo = mail_header.match(MessageReplyTo,"ig");
      var matchMessageDkimSignature = mail_header.match(MessageDkimSignature,"ig");
      var matchMessageXAuthenticatedUser = mail_header.match(MessageXAuthenticatedUser,"ig");
      var matchSender=mail_header.match(MessageSender,"ig");
      //var matchMessageIDBlock = mail_header.match(MessageIDBlock,"ig");
     
      //spam_dkim_signature
      //var matchMessageReceivedFrom=regExpMessageReceivedFrom.exec(mail_header);
      //dont bother the next line google does that for you
      //var matchMessageBase64 = mail_rawcontent.indexOf('Content-Transfer-Encoding: base64');
     
      //set regex results
      var mail_messageid="";
      var mail_message_received_from="";
      var mail_message_dkim_signature="";
      var mail_message_return_path="";
      var mail_message_reply_to="";
      var mail_message_xauthenticated_user="";
      var mail_message_sender="";

      if (matchMessageID!=null){
        mail_messageid=matchMessageID[0];
      }
      if (matchMessageReceivedFrom!=null){
        mail_message_received_from=matchMessageReceivedFrom[0];
      }
      if (matchMessageDkimSignature!=null){
        mail_message_dkim_signature=matchMessageDkimSignature[0];
      }
      if (matchMessageReturnPath!=null){
        mail_message_return_path=matchMessageReturnPath[0];
      }
      if (matchMessageReplyTo!=null){
        mail_message_reply_to=matchMessageReplyTo[0];
      }
      if (matchMessageXAuthenticatedUser!=null){
        mail_message_xauthenticated_user=matchMessageXAuthenticatedUser[0];
      }
      if (matchSender!=null){
        mail_message_sender=matchSender[0];
      }

     
      //standard check
      for (var k=0;k<spam_from.length;k++){
        if (mail_from.indexOf(spam_from[k])>=0){
          cnt++;
          break;
        }
      }
     
      for (var k=0;k<spam_messageid.length;k++){
        if (mail_messageid.indexOf(spam_messageid[k])>=0){
          cnt++;
          break;
        }
      }

      for (var k=0;k<spam_replyto.length;k++){
        if (mail_replyto.indexOf(spam_replyto[k])>=0){
          cnt++;
          break;
        }
      }

      for (var k=0;k<spam_dkim_signature.length;k++){
        if (mail_message_dkim_signature.indexOf(mail_message_dkim_signature[k])>=0){
          cnt++;
          break;
        }
      }


      for (var k=0;k<spam_sender.length;k++){
        if (mail_message_sender.indexOf(mail_message_sender[k])>=0){
          cnt++;
          break;
        }
      }

      for (var k=0;k<spam_body.length;k++){
        if (mail_body.indexOf(mail_body[k])>=0){
          cnt++;
          break;
        }
      }
      /*
      for (var k=0;k<spam_returnpath.length;k++){
        for (var l=0;l<matchMessageReturnPath.length;l++){
          var sr=spam_returnpath[k];
          var mm=matchMessageReturnPath[l];
          var mtc=matchMessageReturnPath[l].indexOf(spam_returnpath[k]);
          if (matchMessageReturnPath[l].indexOf(spam_returnpath[k])>=0){
            cnt++;
            break;
          }
        }
      }
      */
      //see https://tools.ietf.org/html/rfc2045
      //var crcr_location=(mail_rawcontent.IndexOf(cr));
      //var result = regex.exec(mail_rawcontent_lowercase);
      //regex: Message-ID: <[A-Za-z0-9-@]+.tk>   matches Message-ID: <......tk>
      //var name = "John Smith";
      //spammers often leave the Message-ID alone they need it, so we can use it to see if it comes from a bad domain.
      //nowadays there are all new domains for free such as .cf .men .px .ml .gq   etc  I simple block those domains.
      //Examples:
      //Message-ID: <3A03C937-BEA9-4589-867F-593CA3104153@qhgkoctono.ml>
      //Message-ID: <9E2BDE9F-0435-4038-AAE9-4E1B6681DD55@nyonwbeejb.cf>
      //Message-ID: <vdyfn9e6uh7g7gx@u3afzwjx7iql4gx.local.px>
      //Message-ID: <D6C74CE7-CFAD-4E1C-9430-3BEDED41D70D@zgyciflzz.gq>
      //prod.outlook.com|cantv.net|cisco.com
     
      //Message-Id: <592b9af3.a284500a.1f2a9.e747SMTPIN_ADDED_MISSING@mx.google.com>
      //if (matchMessageIDBlock!=null){
      //  if (matchMessageIDBlock.length>0){
      //    cnt++;
      //  }
      //}else if (matchMessageID!=null){
      //  if (matchMessageID[0].indexOf('SMTPIN_ADDED_MISSING@mx.google.com')!=-1){
      //    cnt++;
      //  }
      //}else
      if (matchMessageReceivedFrom!=null){
        if (matchMessageReceivedFrom.length>0){
          if (matchMessageReceivedFrom.indexOf('.cc')!=-1){
            cnt++;
          }
        }
      }else if (matchMessageReturnPath!=null){
        if (matchMessageReturnPath.length>=0){
          if (matchMessageReturnPath.indexOf('.at')!=-1){
            cnt++;
          }
        }
      }else if (mail_rawcontent_lowercase.indexOf('<http://bit.ly/')!=-1){
        cnt++;
//      }else if (mail_rawcontent.indexOf('If you do not wish to continue </em> receiving email newsletters')!=-1){
//        delmsg++;
      }else if (mail_rawcontent_lowercase.indexOf('<http://tinyurl.com/')!=-1){
        cnt++;
//      }else if (mail_rawcontent_lowercase.indexOf('a href="<http://tinyurl.com/')!=-1){
//        delmsg++;
//      }else if (mail_rawcontent_lowercase.indexOf('<img src="http://tinyurl.com/')!=-1){
//        delmsg++;
//      }else if (mail_rawcontent_lowercase.indexOf('onmicrosoft.com>')!=-1){
//        delmsg++;
        //Gmail.Users.Messages.remove('me', threads[m].getId());
      }else if (mail_rawcontent.indexOf('.onmicrosoft.com>')!=-1){
          cnt++;
      }
/*  this is not needed mail_body already holds a decoded 64 if it is in base64 or other, thanks to google.
      }else if (mail_rawcontent.indexOf('Content-Transfer-Encoding: base64')!=-1){
        //mail is base64 encoded so decode it
        mail_body=base64decode(mail_body);
      }
*/
       
      if (cnt>0){
        try {
          if (ActionType=='DELETE'){
            Gmail.Users.Messages.remove('me', messages[m].getId());
          }else if (ActionType=='TRASH'){
            messages[m].moveToTrash();
          }
        } finally {
        }
      }
      //Received-SPF: softfail (google.com: domain of transitioning info@info.com does not designate 95.0.185.114 as permitted sender) client-ip=95.0.185.114;
      //intersesting part is here: google.com: ....  does not designate .... as permitted sender  we can use that to block the stupid spammers as well
    }
  }
}


function TrashMail() {
  var threads = GmailApp.search("in:trash");
  for (var i = 0; i < threads.length; i++) {
    Gmail.Users.Messages.remove('me', threads[i].getId());
  }
}


Since google is changing to OAuth the procedure will not work without setting the proper api's and Oauth settings.Apparently this doesnt influence the working of the script as far as I see...

Well this was wrong from me... It does still work on my account. My mail box is automatially cleaned according to my filers.

No comments:

Post a Comment

Please leave nonsense comments out.