Wednesday, May 31, 2017

Remove SPAM from gmail automatically 4


Time for a new one..
In this one I catch the SMTPIN_ADDED_MISSING@mx.google.com which a normal mail doesnt appear to have.
Next to that the MessageID itself is seperated from mail source, it is done to check against the SMTPIN_... thingy.
A few domains are added as well. The code is changed to make it more clear what the regexp used is.
Using several regExp expressions makes the code somewhat easier to maintain.
Getting this to work by using a database will be one of the future options. Sadly enough there are not many free databases for google. Of couse it is possible to store the data in a spreadsheet or in a simple text/csv file and read that.

Be aware if you havent updated! I had used the try finally construction from javascript. But this is a construction that does not exist in google script. This makes that MoveFromSpamToTrash did not work. So here is the new version.
2017-06-06: Another update, this is the latest version so far..

 

function RemoveSpamFromTrash(){
  //var regExp = new RegExp("(?:\\s)([a-z]+)", "gi");
  //var regExp = new RegExp("Return-Path: .*onmicrosoft.com>");

  //MessageID ophalen
  var MessageID=RegExp("Message-ID: <.+>","ig");
  // "i" is for case insensitive
  var MessageIDBlock="Message-ID: <[a-zA-Z0-9-@.]+(gq|ml|tk|px|cf|ga|jp|cf|cc|it|id|tr|gr|ec|ar|pe|co|ua|cr|hk|bn|in|br|id|it|mx|cn|life|atlis1|biz|glocal\.net|gov\.my|local|att\.net|prod\.outlook\.com|onmicrosoft\.com|localdomain|trade|cisco\.com|co\.in|arcamax\.com|alice\.it|co\.nz|gov\.br|telkom\.net|vevida\.net|global\.net|[0-9.]+)>";
  var MessageIDSMTPIN="Message-ID: <.+SMTPIN_ADDED_MISSING@mx.google.com>";
  var MessageReturnPath="Return-Path: <.+>";
  var MessageReceivedFrom="Received: from .+ ";
  var MessageReplyTo="Reply-To: <.+>";
  //for breaking up the header and the mailbody we need this
  var crcr=String.fromCharCode(10)+String.fromCharCode(10);
 
  //var regExpMailFrom = RegExp("[a-z-@.]+\.(cisco.com)","ig"); // "i" is for case insensitive
  //for all threads
  //find Content-Transfer-Encoding: base64

  var threads= GmailApp.getTrashThreads();
  for (var i=0;i<threads.length;i++){
    var messages=threads[i].getMessages();
    var messages_length=messages.length;
    var delmsg=0;
    //for all messages in the thread
    for (var m=0;m<messages_length;m++){
      //get raw mail
      var mail_replyto=messages[m].getReplyTo();
      var mail_from=messages[m].getFrom();
      var mail_to=messages[m].getTo();
      var mail_bcc=messages[m].getBcc();
      var mail_cc=messages[m].getCc();
      var mail_body=messages[m].getBody();
      var mail_plainbody=messages[m].getPlainBody();  //the body without HTML
      var mail_date=messages[m].getDate();
      var mail_id=messages[m].getId();
      var mail_subject=messages[m].getSubject().toLowerCase();
      //var mail_thread=messages[m].getThread();
      //var mail_attachments=messages[m].getAttachments();
      var mail_rawcontent=messages[m].getRawContent();  //the raw email
      var mail_rawcontent_lowercase=mail_rawcontent.toLowerCase();
      var x=mail_rawcontent.length;
      var cr=String.fromCharCode(13);
      var lf=String.fromCharCode(10);
      var crcr_location=mail_rawcontent.indexOf(cr+lf+cr+lf);
      var mail_header=mail_rawcontent.substr(0,crcr_location);
      var mail_header_lines=mail_header.split(cr+lf);
      //see https://tools.ietf.org/html/rfc2045
      //var crcr_location=(mail_rawcontent.IndexOf(cr));
      //var result = regex.exec(mail_rawcontent_lowercase);
      //regex: Message-ID: <[A-Za-z0-9-@]+.tk>   matches Message-ID: <......tk>
      //var name = "John Smith";
      //spammers often leave the Message-ID alone they need it, so we can use it to see if it comes from a bad domain.
      //nowadays there are all new domains for free such as .cf .men .px .ml .gq   etc  I simple block those domains.
      //Examples:
      //Message-ID: <3A03C937-BEA9-4589-867F-593CA3104153@qhgkoctono.ml>
      //Message-ID: <9E2BDE9F-0435-4038-AAE9-4E1B6681DD55@nyonwbeejb.cf>
      //Message-ID: <vdyfn9e6uh7g7gx@u3afzwjx7iql4gx.local.px>
      //Message-ID: <D6C74CE7-CFAD-4E1C-9430-3BEDED41D70D@zgyciflzz.gq>
      //prod.outlook.com|cantv.net|cisco.com
      var matchMessageID = mail_header.match(MessageID,"ig");
      var matchMessageReceivedFrom = mail_header.match(MessageReceivedFrom,"ig");
      var matchMessageIDBlock = mail_header.match(MessageIDBlock,"ig");
      var matchMessageIDSMTPIN = mail_header.match(MessageIDSMTPIN,"ig");
      var matchMessageReturnPath = mail_header.match(MessageReturnPath,"ig");
      var matchMessageReplyTo = mail_header.match(MessageReplyTo,"ig");
      //var matchMessageReceivedFrom=regExpMessageReceivedFrom.exec(mail_header);
      //dont bother the next line google does that for you
      //var matchMessageBase64 = mail_rawcontent.indexOf('Content-Transfer-Encoding: base64');
     
      //Message-Id: <592b9af3.a284500a.1f2a9.e747SMTPIN_ADDED_MISSING@mx.google.com>
      //var matchGoogleMessageID = regExpMessageIDMISSING.exec(mail_rawcontent);
      //var inMessageID = regExpMessageID.exec(mail_rawcontent);
      //var matchMailFrom = regExpMailFrom.exec(mail_from);
      //Message-Id: <592b9af3.a284500a.1f2a9.e747SMTPIN_ADDED_MISSING@mx.google.com>
      if (matchMessageID.length>0){
        if (matchMessageID[0].indexOf('SMTPIN_ADDED_MISSING@mx.google.com')!=-1){
          delmsg++;
        }
      }

      if (matchMessageID.length>0){
        delmsg++;
      }if (matchMessageReceivedFrom.length>0){
        if (matchMessageReceivedFrom.indexOf('.cc')!=-1){
          delmsg++;
        }
      }else if (mail_replyto.indexOf('permanentbrain.com')!=-1){
        delmsg++;
      }else if (matchMessageReturnPath.length>=0){
        if (matchMessageReturnPath.indexOf('.at')!=-1){
          delmsg++;
        }
      }else if (mail_from.indexOf('ocn.ne.jp>')!=-1){
        delmsg++;
      }else if (mail_from.indexOf('.ar>')!=-1){
        delmsg++;
      }else if (mail_from.indexOf('cesterinevitably.com')!=-1){
        delmsg++;
      }else if (mail_from.indexOf('domainandmarket.com')!=-1){
        delmsg++;
      }else if (mail_from.indexOf('permanentbrain.com')!=-1){
        delmsg++;
      }else if (mail_rawcontent_lowercase.indexOf('<http://bit.ly/')!=-1){
        delmsg++;
      }else if (mail_rawcontent.indexOf('If you do not wish to continue </em> receiving email newsletters')!=-1){
        delmsg++;
      }else if (mail_rawcontent_lowercase.indexOf('<http://tinyurl.com/')!=-1){
        delmsg++;
//      }else if (mail_rawcontent_lowercase.indexOf('a href="<http://tinyurl.com/')!=-1){
//        delmsg++;
//      }else if (mail_rawcontent_lowercase.indexOf('<img src="http://tinyurl.com/')!=-1){
//        delmsg++;
//      }else if (mail_rawcontent_lowercase.indexOf('onmicrosoft.com>')!=-1){
//        delmsg++;
        //Gmail.Users.Messages.remove('me', threads[m].getId());
      }else if (mail_rawcontent.indexOf('.onmicrosoft.com>')!=-1){
          delmsg++;
      }
/*
      }else if (mail_rawcontent.indexOf('Content-Transfer-Encoding: base64')!=-1){
        //mail is base64 encoded so decode it
        mail_body=base64decode(mail_body);
      }
*/
       
      //var regExp = RegExp("Message-ID: <[a-zA-Z0-9-@.]+\.(gq|ml|tk|px|cf|ga|jp|cf|it|cantv.net)>","ig"); // "i" is for case insensitive
      if (delmsg>0){
        try {
          Gmail.Users.Messages.remove('me', messages[m].getId());
        } finally {
        }
      }
      //Received-SPF: softfail (google.com: domain of transitioning info@info.com does not designate 95.0.185.114 as permitted sender) client-ip=95.0.185.114;
      //intersesting part is here: google.com: ....  does not designate .... as permitted sender  we can use that to block the stupid spammers as well
    }
  }
}


function MoveFromSpamToTrash() {
  var threads=GmailApp.getSpamThreads();
  for (var i=0;i<threads.length;i++){
    //send to trash messages
    var messages=threads[i].getMessages();
    var spamcnt=0;
    for (var m=0;m<messages.length;m++){
      //get raw mail
      var mail_replyto=messages[m].getReplyTo();
      var mail_from=messages[m].getFrom();
      //var mail_to=messages[m].getTo();
      //var mail_bcc=messages[m].getBcc();
      //var mail_body=messages[m].getBody();
      //var mail_plainbody=messages[m].getPlainBody();  //the body without HTML
      //var mail_cc=messages[m].getCc();
      //var mail_date=messages[m].getDate();
      //var mail_id=messages[m].getId();  //useless info
      var mail_subject=messages[m].getSubject().toLowerCase();
      //var mail_thread=messages[m].getThread();
      //var mail_attachments=messages[m].getAttachments();
      var mail_rawcontent=messages[m].getRawContent();  //the raw email
      var mail_rawcontent_lowercase=mail_rawcontent.toLowerCase();
     
      //var noreturn=mail_rawcontent.findText('Return-Path: <>');
      if (mail_from.indexOf('.gov.cn')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('.ru')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('.xyz')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('.cf>')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('simplemailwork.com')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('probalsa.com.ec')!=-1){
        spamcnt++;
      }else if (mail_from.indexOf('.men')!=-1){
        spamcnt++;
      }else if (mail_subject.indexOf('million ')!=-1 || mail_subject.indexOf(' beneficiary')!=-1 || mail_subject.indexOf(' nigeria')!=-1){
        spamcnt++;
      }else if (mail_subject.indexOf('money gram')){
        spamcnt++;
      }else if (mail_rawcontent.indexOf('Return-Path: <>')!=-1){
        spamcnt++;
      }else if (mail_rawcontent.indexOf('User-Agent: Roundcube Webmail')!=-1){  //filter out spam mailer: User-Agent: Roundcube Webmail
        spamcnt++;
      }else if (mail_rawcontent.indexOf('X-CTCH-Spam:  Bulk')!=-1){
        spamcnt++;
      }else if (mail_rawcontent.indexOf('Received-SPF: softfail')!=-1){
        spamcnt++;
      }else if (mail_rawcontent_lowercase.indexOf('lottery')!=-1){
        spamcnt++;
      }else if (mail_rawcontent_lowercase.indexOf('inheritance')!=-1){
        spamcnt++;
      }else if (mail_rawcontent_lowercase.indexOf('nigeria')!=-1){
        spamcnt++;
      }else if (mail_rawcontent_lowercase.indexOf('bank draft')!=-1){
        spamcnt++;
      }else if (mail_rawcontent.indexOf('Content-Type: application/msword;')!=-1){ //if there are attachments then doc and docx are not allowed
        spamcnt++;
      }else if ((mail_rawcontent_lowercase.indexOf(' million ')!=-1) || (mail_rawcontent_lowercase.indexOf(' thousand ')!=-1)){
        if (mail_rawcontent_lowercase.indexOf(' usd')!=-1){
        spamcnt++;
        }else if (mail_rawcontent_lowercase.indexOf(' dollar')!=-1){
        spamcnt++;
        }
      }else if (mail_rawcontent_lowercase.indexOf(' western union')!=-1){
        spamcnt++;
      }else if ((mail_rawcontent_lowercase.indexOf(' visa ')!=-1) || (mail_rawcontent_lowercase.indexOf(' master ')!=-1) || (mail_rawcontent_lowercase.indexOf(' atm card')!=-1)){
        spamcnt++;
      }
      if (spamcnt!=0){
        messages[m].moveToTrash();
        //Gmail.Users.Messages.remove('me', threads[i].getId());
      }
      //Received-SPF: softfail (google.com: domain of transitioning info@info.com does not designate 95.0.185.114 as permitted sender) client-ip=95.0.185.114;
      //check if it has a return path via RegExp
      //var re = new RegExp('\\.|\\-', 'gi');
      //in the first ten lines should suffice
      //for (var l=0;l<10;l++){
    }
  }
  //TrashMail();
  //RemoveTrashSpam();
}

function TrashMail() {
  var threads = GmailApp.search("in:trash");
  for (var i = 0; i < threads.length; i++) {
    Gmail.Users.Messages.remove('me', threads[i].getId());
  }
}

No comments:

Post a Comment

Please leave nonsense comments out.